collapse/0000755000176200001440000000000014201453312012047 5ustar liggesuserscollapse/NAMESPACE0000644000176200001440000003715414201327077013310 0ustar liggesusersuseDynLib(collapse, .registration=TRUE) # , .fixes = "C_" importFrom(Rcpp, evalCpp) importFrom("graphics", "hist", "par", "plot") importFrom("grDevices", "rainbow") # importFrom("lfe", "demeanlist") importFrom("parallel", "mclapply") importFrom("utils", "packageVersion") importFrom("stats", "as.formula", "complete.cases", "cor", "cov", "var", "pt", "setNames", "terms.formula", "ts", "ts.plot", "model.matrix.default", "quantile", ".lm.fit", "cov2cor") export(add_stub) export(rm_stub) export(all_identical) export(all_obj_equal) # export(as.factor.GRP) export(as_factor_GRP) export(as.factor_GRP) export(as_factor_qG) export(as.factor_qG) export(atomic_elem) export(`atomic_elem<-`) export(B) export(fbetween) export(fbetween.data.frame) export(fbetween.default) export(fbetween.matrix) export(fselect) export(slt) export(`fselect<-`) export(`slt<-`) export(ss) export(fsubset) export(sbt) export(fsubset.data.frame) export(fsubset.default) export(fsubset.matrix) export(fsummarise) export(smr) export(ftransform) export(tfm) export(`ftransform<-`) export(`tfm<-`) export(ftransformv) export(tfmv) export(settransform) export(settfm) export(settransformv) export(settfmv) export(fmutate) export(mtt) export(fcompute) export(fcomputev) export(BY) export(BY.data.frame) export(BY.default) export(BY.matrix) export(cat_vars) export(`cat_vars<-`) export(char_vars) export(`char_vars<-`) export(collap) export(collapv) export(collapg) export(D) export(Dlog) export(dapply) export(date_vars) export(`date_vars<-`) export(Date_vars) export(`Date_vars<-`) export(F) export(fFtest) export(fact_vars) export(`fact_vars<-`) export(fdiff) export(fdiff.data.frame) export(fdiff.default) export(fdiff.matrix) export(ffirst) export(ffirst.data.frame) export(ffirst.default) export(ffirst.matrix) export(fgrowth) export(fgrowth.data.frame) export(fgrowth.default) export(fgrowth.matrix) export(flag) export(flag.data.frame) export(flag.default) export(flag.matrix) export(fcumsum) export(fcumsum.data.frame) export(fcumsum.default) export(fcumsum.matrix) export(flast) export(flast.data.frame) export(flast.default) export(flast.matrix) export(fmax) export(fmax.data.frame) export(fmax.default) export(fmax.matrix) export(fmean) export(fmean.data.frame) export(fmean.default) export(fmean.matrix) export(fmedian) export(fmedian.data.frame) export(fmedian.default) export(fmedian.matrix) export(fnth) export(fnth.data.frame) export(fnth.default) export(fnth.matrix) export(fmin) export(fmin.data.frame) export(fmin.default) export(fmin.matrix) export(fmode) export(fmode.data.frame) export(fmode.default) export(fmode.matrix) export(fndistinct) export(fndistinct.data.frame) export(fndistinct.default) export(fndistinct.matrix) export(fNdistinct) export(fNdistinct.data.frame) export(fNdistinct.default) export(fNdistinct.matrix) export(fnobs) export(fnobs.data.frame) export(fnobs.default) export(fnobs.matrix) export(fNobs) export(fNobs.data.frame) export(fNobs.default) export(fNobs.matrix) export(varying) export(varying.data.frame) export(varying.default) export(varying.matrix) export(fprod) export(fprod.data.frame) export(fprod.default) export(fprod.matrix) export(fscale) export(fscale.data.frame) export(fscale.default) export(fscale.matrix) export(fsd) export(fsd.data.frame) export(fsd.default) export(fsd.matrix) export(fsum) export(fsum.data.frame) export(fsum.default) export(fsum.matrix) export(fvar) export(fvar.data.frame) export(fvar.default) export(fvar.matrix) export(G) export(get_elem) export(get_vars) export(`get_vars<-`) export(gv) export(gvr) export(`gv<-`) export(`gvr<-`) export(add_vars) export(av) export(`add_vars<-`) export(`av<-`) export(radixorder) export(radixorderv) export(seqid) export(groupid) export(GRP) export(GRP.default) export(fgroup_by) export(gby) export(fgroup_vars) export(fungroup) export(GRPnames) export(GRPN) # export(group_names.GRP) export(has_elem) export(flm) export(cinv) export(HDB) export(fhdbetween) export(fhdbetween.default) export(fhdbetween.matrix) export(fhdbetween.data.frame) export(fHDbetween) export(fHDbetween.default) export(fHDbetween.matrix) export(fHDbetween.data.frame) export(HDW) export(fhdwithin) export(fhdwithin.default) export(fhdwithin.matrix) export(fhdwithin.data.frame) export(fHDwithin) export(fHDwithin.default) export(fHDwithin.matrix) export(fHDwithin.data.frame) export(irreg_elem) export(is_categorical) export(is_date) export(is_GRP) export(is_qG) export(is_unlistable) export(is.categorical) export(is.Date) export(is.GRP) export(is.qG) export(is.unlistable) export(is.regular) export(L) export(ldepth) export(list_elem) export(`list_elem<-`) export(logi_vars) export(`logi_vars<-`) export(mctl) export(mrtl) export(namlab) export(num_vars) export(`num_vars<-`) export(nv) export(`nv<-`) export(psacf) export(psacf.default) export(psacf.data.frame) export(pspacf) export(pspacf.default) export(pspacf.data.frame) export(psccf) export(psccf.default) export(psmat) export(psmat.default) export(psmat.data.frame) export(qDF) export(qDT) export(qTBL) export(qF) export(qG) export(qM) export(qsu) export(qsu.default) export(qsu.matrix) export(qsu.data.frame) export(descr) export(rapply2d) export(t_list) export(gsplit) export(rsplit) export(rsplit.default) export(rsplit.data.frame) export(fdroplevels) export(fdroplevels.factor) export(fdroplevels.data.frame) export(reg_elem) export(STD) export(TRA) export(TRA.data.frame) export(TRA.default) export(TRA.matrix) export(unlist2d) export(vlabels) export(vclasses) export(vtypes) export(vlengths) export(`vlabels<-`) export(setLabels) export(W) export(fwithin) export(fwithin.data.frame) export(fwithin.default) export(fwithin.matrix) export(seq_row) export(seq_col) export(.c) export(setRownames) export(setColnames) export(setDimnames) export(unattrib) # export(setAttr) export(setAttrib) export(copyAttrib) export(copyMostAttrib) export(pwcor) export(pwcov) export(pwnobs) export(pwNobs) export(whichv) export(`%==%`) export(`%!=%`) export(whichNA) export(copyv) export(setv) export(setop) export(`%+=%`) export(`%-=%`) export(`%*=%`) export(`%/=%`) export(alloc) export(allv) export(anyv) export(allNA) export(missing_cases) export(na_rm) export(na_omit) export(na_insert) export(massign) export(`%=%`) export(`%!in%`) export(`%rr%`) export(`%r+%`) export(`%r-%`) export(`%r*%`) export(`%r/%`) export(`%cr%`) export(`%c+%`) export(`%c-%`) export(`%c*%`) export(`%c/%`) export(ckmatch) export(Recode) export(recode_num) export(recode_char) export(replace_NA) export(pad) export(replace_Inf) export(replace_non_finite) export(replace_outliers) export(print.qsu) export(print.pwcor) export(print.pwcov) export(fnlevels) export(roworder) export(roworderv) export(frename) export(rnm) export(setrename) export(relabel) export(setrelabel) export(colorder) export(colorderv) export(group) export(funique) export(funique.default) export(funique.data.frame) export(finteraction) export(fnrow) export(fncol) export(fdim) export(as_numeric_factor) export(as_character_factor) export(as.numeric_factor) export(as.character_factor) # export(.NA_RM) export(.FAST_FUN) export(.FAST_STAT_FUN) export(.OPERATOR_FUN) export(.COLLAPSE_TOPICS) export(.COLLAPSE_ALL) export(.COLLAPSE_GENERIC) export(.COLLAPSE_DATA) S3method(B, data.frame) S3method(B, list) S3method(B, default) S3method(B, grouped_df) S3method(B, matrix) S3method(B, pdata.frame) S3method(B, pseries) S3method(fbetween, data.frame) S3method(fbetween, list) S3method(fbetween, default) S3method(fbetween, grouped_df) S3method(fbetween, matrix) S3method(fbetween, pdata.frame) S3method(fbetween, pseries) S3method(fsubset, data.frame) S3method(fsubset, default) S3method(fsubset, matrix) S3method(rsplit, default) S3method(rsplit, data.frame) S3method(fdroplevels, default) S3method(fdroplevels, factor) S3method(fdroplevels, list) S3method(fdroplevels, data.frame) S3method(BY, data.frame) S3method(BY, list) S3method(BY, default) S3method(BY, grouped_df) S3method(BY, matrix) S3method(D, data.frame) S3method(D, list) S3method(D, default) S3method(D, expression) S3method(D, call) S3method(D, name) S3method(D, grouped_df) S3method(D, matrix) S3method(D, pdata.frame) S3method(D, pseries) S3method(Dlog, data.frame) S3method(Dlog, list) S3method(Dlog, default) S3method(Dlog, grouped_df) S3method(Dlog, matrix) S3method(Dlog, pdata.frame) S3method(Dlog, pseries) S3method(F, data.frame) S3method(F, list) S3method(F, default) S3method(F, grouped_df) S3method(F, matrix) S3method(F, pdata.frame) S3method(F, pseries) S3method(fdiff, data.frame) S3method(fdiff, list) S3method(fdiff, default) S3method(fdiff, grouped_df) S3method(fdiff, matrix) S3method(fdiff, pdata.frame) S3method(fdiff, pseries) S3method(ffirst, data.frame) S3method(ffirst, list) S3method(ffirst, default) S3method(ffirst, grouped_df) S3method(ffirst, matrix) S3method(fgrowth, data.frame) S3method(fgrowth, list) S3method(fgrowth, default) S3method(fgrowth, grouped_df) S3method(fgrowth, matrix) S3method(fgrowth, pdata.frame) S3method(fgrowth, pseries) S3method(flag, data.frame) S3method(flag, list) S3method(flag, default) S3method(flag, grouped_df) S3method(flag, matrix) S3method(flag, pdata.frame) S3method(flag, pseries) S3method(fcumsum, data.frame) S3method(fcumsum, list) S3method(fcumsum, default) S3method(fcumsum, grouped_df) S3method(fcumsum, matrix) S3method(fcumsum, pdata.frame) S3method(fcumsum, pseries) S3method(flast, data.frame) S3method(flast, list) S3method(flast, default) S3method(flast, grouped_df) S3method(flast, matrix) S3method(fmax, data.frame) S3method(fmax, list) S3method(fmax, default) S3method(fmax, grouped_df) S3method(fmax, matrix) S3method(fmean, data.frame) S3method(fmean, list) S3method(fmean, default) S3method(fmean, grouped_df) S3method(fmean, matrix) S3method(fmedian, data.frame) S3method(fmedian, list) S3method(fmedian, default) S3method(fmedian, grouped_df) S3method(fmedian, matrix) S3method(fnth, data.frame) S3method(fnth, list) S3method(fnth, default) S3method(fnth, grouped_df) S3method(fnth, matrix) S3method(fmin, data.frame) S3method(fmin, list) S3method(fmin, default) S3method(fmin, grouped_df) S3method(fmin, matrix) S3method(fmode, data.frame) S3method(fmode, list) S3method(fmode, default) S3method(fmode, grouped_df) S3method(fmode, matrix) S3method(fndistinct, data.frame) S3method(fndistinct, list) S3method(fndistinct, default) S3method(fndistinct, grouped_df) S3method(fndistinct, matrix) S3method(fNdistinct, data.frame) S3method(fNdistinct, default) S3method(fNdistinct, matrix) S3method(funique, data.frame) S3method(funique, list) S3method(funique, sf) S3method(funique, default) S3method(fnobs, data.frame) S3method(fnobs, list) S3method(fnobs, default) S3method(fnobs, grouped_df) S3method(fnobs, matrix) S3method(fNobs, data.frame) S3method(fNobs, default) S3method(fNobs, matrix) S3method(varying, data.frame) S3method(varying, pdata.frame) S3method(varying, pseries) S3method(varying, list) S3method(varying, sf) S3method(varying, default) S3method(varying, grouped_df) S3method(varying, matrix) S3method(fprod, data.frame) S3method(fprod, list) S3method(fprod, default) S3method(fprod, grouped_df) S3method(fprod, matrix) S3method(fscale, data.frame) S3method(fscale, list) S3method(fscale, default) S3method(fscale, grouped_df) S3method(fscale, matrix) S3method(fscale, pdata.frame) S3method(fscale, pseries) S3method(fsd, data.frame) S3method(fsd, list) S3method(fsd, default) S3method(fsd, grouped_df) S3method(fsd, matrix) S3method(fsum, data.frame) S3method(fsum, list) S3method(fsum, default) S3method(fsum, grouped_df) S3method(fsum, matrix) S3method(fvar, data.frame) S3method(fvar, list) S3method(fvar, default) S3method(fvar, grouped_df) S3method(fvar, matrix) S3method(G, data.frame) S3method(G, list) S3method(G, default) S3method(G, grouped_df) S3method(G, matrix) S3method(G, pdata.frame) S3method(G, pseries) S3method(GRP, default) S3method(GRP, GRP) S3method(GRP, factor) S3method(GRP, grouped_df) S3method(GRP, pdata.frame) S3method(GRP, pseries) S3method(GRP, qG) S3method(HDB, data.frame) S3method(HDB, default) S3method(HDB, matrix) S3method(HDB, pdata.frame) S3method(HDB, pseries) S3method(fhdbetween, default) S3method(fhdbetween, matrix) S3method(fhdbetween, data.frame) S3method(fhdbetween, pdata.frame) S3method(fhdbetween, pseries) S3method(fHDbetween, default) S3method(fHDbetween, matrix) S3method(fHDbetween, data.frame) S3method(HDW, data.frame) S3method(HDW, default) S3method(HDW, matrix) S3method(HDW, pdata.frame) S3method(HDW, pseries) S3method(fhdwithin, default) S3method(fhdwithin, matrix) S3method(fhdwithin, data.frame) S3method(fhdwithin, pdata.frame) S3method(fhdwithin, pseries) S3method(fHDwithin, default) S3method(fHDwithin, matrix) S3method(fHDwithin, data.frame) S3method(L, data.frame) S3method(L, list) S3method(L, default) S3method(L, grouped_df) S3method(L, matrix) S3method(L, pdata.frame) S3method(L, pseries) S3method(length, GRP) S3method(plot, GRP) S3method(print, GRP) S3method(print, GRP_df) # S3method(head, GRP_df) # S3method(tail, GRP_df) S3method(print, qsu) S3method(print, descr) S3method(print, pwcor) S3method(print, pwcov) S3method(print, fFtest) S3method(print, psmat) S3method(print, invisible) S3method(aperm, psmat) S3method(aperm, qsu) S3method('[', psmat) S3method('[', qsu) S3method('[', pwcor) S3method('[', pwcov) S3method('[', GRP_df) S3method('[[', GRP_df) S3method('[<-', GRP_df) S3method('[[<-', GRP_df) S3method(as.data.frame, descr) S3method(psacf, data.frame) S3method(psacf, default) S3method(psacf, pdata.frame) S3method(psacf, pseries) S3method(psccf, default) S3method(psccf, pseries) S3method(psmat, data.frame) S3method(psmat, default) S3method(psmat, pdata.frame) S3method(psmat, pseries) S3method(plot, psmat) S3method(pspacf, data.frame) S3method(pspacf, default) S3method(pspacf, pdata.frame) S3method(pspacf, pseries) S3method(qsu, data.frame) S3method(qsu, default) S3method(qsu, matrix) S3method(qsu, pdata.frame) S3method(qsu, list) S3method(qsu, sf) S3method(qsu, pseries) S3method(STD, data.frame) S3method(STD, list) S3method(STD, default) S3method(STD, grouped_df) S3method(STD, matrix) S3method(STD, pdata.frame) S3method(STD, pseries) S3method(TRA, data.frame) S3method(TRA, list) S3method(TRA, default) S3method(TRA, grouped_df) S3method(TRA, matrix) S3method(W, data.frame) S3method(W, list) S3method(W, default) S3method(W, grouped_df) S3method(W, matrix) S3method(W, pdata.frame) S3method(W, pseries) S3method(fwithin, data.frame) S3method(fwithin, list) S3method(fwithin, default) S3method(fwithin, grouped_df) S3method(fwithin, matrix) S3method(fwithin, pdata.frame) S3method(fwithin, pseries) collapse/LICENSE0000644000176200001440000010741213665256122013076 0ustar liggesusersThis is free software licensed under a GNU General Public License 2.0 (GPL-2.0), and may be redistributed and/or modified under the terms of this license. However this software includes modified C-code from the data.table package (http://r-datatable.com) which is licensed under the weaker Mozilla Public License 2.0 (MPL-2.0) license. Any modification of these source files requires preservation of the MPL-2.0 license. The license statements for GPL-2.0 and MPL-2.0 are provided below. The MPL-2.0 License applies to the following files: src/data.table.h src/data.table_init.c src/data.table_rbindlist.c src/data.table_subset.c src/data.table_utils.c The rest is licensed GPL-2.0. ============================================================================================ ******************************************************************************************** GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. ============================================================================================ ******************************************************************************************** Mozilla Public License Version 2.0 ================================== 1. Definitions -------------- 1.1. "Contributor" means each individual or legal entity that creates, contributes to the creation of, or owns Covered Software. 1.2. "Contributor Version" means the combination of the Contributions of others (if any) used by a Contributor and that particular Contributor's Contribution. 1.3. "Contribution" means Covered Software of a particular Contributor. 1.4. "Covered Software" means Source Code Form to which the initial Contributor has attached the notice in Exhibit A, the Executable Form of such Source Code Form, and Modifications of such Source Code Form, in each case including portions thereof. 1.5. "Incompatible With Secondary Licenses" means (a) that the initial Contributor has attached the notice described in Exhibit B to the Covered Software; or (b) that the Covered Software was made available under the terms of version 1.1 or earlier of the License, but not also under the terms of a Secondary License. 1.6. "Executable Form" means any form of the work other than Source Code Form. 1.7. "Larger Work" means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software. 1.8. "License" means this document. 1.9. "Licensable" means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently, any and all of the rights conveyed by this License. 1.10. "Modifications" means any of the following: (a) any file in Source Code Form that results from an addition to, deletion from, or modification of the contents of Covered Software; or (b) any new file in Source Code Form that contains any Covered Software. 1.11. "Patent Claims" of a Contributor means any patent claim(s), including without limitation, method, process, and apparatus claims, in any patent Licensable by such Contributor that would be infringed, but for the grant of the License, by the making, using, selling, offering for sale, having made, import, or transfer of either its Contributions or its Contributor Version. 1.12. "Secondary License" means either the GNU General Public License, Version 2.0, the GNU Lesser General Public License, Version 2.1, the GNU Affero General Public License, Version 3.0, or any later versions of those licenses. 1.13. "Source Code Form" means the form of the work preferred for making modifications. 1.14. "You" (or "Your") means an individual or a legal entity exercising rights under this License. For legal entities, "You" includes any entity that controls, is controlled by, or is under common control with You. For purposes of this definition, "control" means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. 2. License Grants and Conditions -------------------------------- 2.1. Grants Each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: (a) under intellectual property rights (other than patent or trademark) Licensable by such Contributor to use, reproduce, make available, modify, display, perform, distribute, and otherwise exploit its Contributions, either on an unmodified basis, with Modifications, or as part of a Larger Work; and (b) under Patent Claims of such Contributor to make, use, sell, offer for sale, have made, import, and otherwise transfer either its Contributions or its Contributor Version. 2.2. Effective Date The licenses granted in Section 2.1 with respect to any Contribution become effective for each Contribution on the date the Contributor first distributes such Contribution. 2.3. Limitations on Grant Scope The licenses granted in this Section 2 are the only rights granted under this License. No additional rights or licenses will be implied from the distribution or licensing of Covered Software under this License. Notwithstanding Section 2.1(b) above, no patent license is granted by a Contributor: (a) for any code that a Contributor has removed from Covered Software; or (b) for infringements caused by: (i) Your and any other third party's modifications of Covered Software, or (ii) the combination of its Contributions with other software (except as part of its Contributor Version); or (c) under Patent Claims infringed by Covered Software in the absence of its Contributions. This License does not grant any rights in the trademarks, service marks, or logos of any Contributor (except as may be necessary to comply with the notice requirements in Section 3.4). 2.4. Subsequent Licenses No Contributor makes additional grants as a result of Your choice to distribute the Covered Software under a subsequent version of this License (see Section 10.2) or under the terms of a Secondary License (if permitted under the terms of Section 3.3). 2.5. Representation Each Contributor represents that the Contributor believes its Contributions are its original creation(s) or it has sufficient rights to grant the rights to its Contributions conveyed by this License. 2.6. Fair Use This License is not intended to limit any rights You have under applicable copyright doctrines of fair use, fair dealing, or other equivalents. 2.7. Conditions Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in Section 2.1. 3. Responsibilities ------------------- 3.1. Distribution of Source Form All distribution of Covered Software in Source Code Form, including any Modifications that You create or to which You contribute, must be under the terms of this License. You must inform recipients that the Source Code Form of the Covered Software is governed by the terms of this License, and how they can obtain a copy of this License. You may not attempt to alter or restrict the recipients' rights in the Source Code Form. 3.2. Distribution of Executable Form If You distribute Covered Software in Executable Form then: (a) such Covered Software must also be made available in Source Code Form, as described in Section 3.1, and You must inform recipients of the Executable Form how they can obtain a copy of such Source Code Form by reasonable means in a timely manner, at a charge no more than the cost of distribution to the recipient; and (b) You may distribute such Executable Form under the terms of this License, or sublicense it under different terms, provided that the license for the Executable Form does not attempt to limit or alter the recipients' rights in the Source Code Form under this License. 3.3. Distribution of a Larger Work You may create and distribute a Larger Work under terms of Your choice, provided that You also comply with the requirements of this License for the Covered Software. If the Larger Work is a combination of Covered Software with a work governed by one or more Secondary Licenses, and the Covered Software is not Incompatible With Secondary Licenses, this License permits You to additionally distribute such Covered Software under the terms of such Secondary License(s), so that the recipient of the Larger Work may, at their option, further distribute the Covered Software under the terms of either this License or such Secondary License(s). 3.4. Notices You may not remove or alter the substance of any license notices (including copyright notices, patent notices, disclaimers of warranty, or limitations of liability) contained within the Source Code Form of the Covered Software, except that You may alter any license notices to the extent required to remedy known factual inaccuracies. 3.5. Application of Additional Terms You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, You may do so only on Your own behalf, and not on behalf of any Contributor. You must make it absolutely clear that any such warranty, support, indemnity, or liability obligation is offered by You alone, and You hereby agree to indemnify every Contributor for any liability incurred by such Contributor as a result of warranty, support, indemnity or liability terms You offer. You may include additional disclaimers of warranty and limitations of liability specific to any jurisdiction. 4. Inability to Comply Due to Statute or Regulation --------------------------------------------------- If it is impossible for You to comply with any of the terms of this License with respect to some or all of the Covered Software due to statute, judicial order, or regulation then You must: (a) comply with the terms of this License to the maximum extent possible; and (b) describe the limitations and the code they affect. Such description must be placed in a text file included with all distributions of the Covered Software under this License. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill to be able to understand it. 5. Termination -------------- 5.1. The rights granted under this License will terminate automatically if You fail to comply with any of its terms. However, if You become compliant, then the rights granted under this License from a particular Contributor are reinstated (a) provisionally, unless and until such Contributor explicitly and finally terminates Your grants, and (b) on an ongoing basis, if such Contributor fails to notify You of the non-compliance by some reasonable means prior to 60 days after You have come back into compliance. Moreover, Your grants from a particular Contributor are reinstated on an ongoing basis if such Contributor notifies You of the non-compliance by some reasonable means, this is the first time You have received notice of non-compliance with this License from such Contributor, and You become compliant prior to 30 days after Your receipt of the notice. 5.2. If You initiate litigation against any entity by asserting a patent infringement claim (excluding declaratory judgment actions, counter-claims, and cross-claims) alleging that a Contributor Version directly or indirectly infringes any patent, then the rights granted to You by any and all Contributors for the Covered Software under Section 2.1 of this License shall terminate. 5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user license agreements (excluding distributors and resellers) which have been validly granted by You or Your distributors under this License prior to termination shall survive termination. ************************************************************************ * * * 6. Disclaimer of Warranty * * ------------------------- * * * * Covered Software is provided under this License on an "as is" * * basis, without warranty of any kind, either expressed, implied, or * * statutory, including, without limitation, warranties that the * * Covered Software is free of defects, merchantable, fit for a * * particular purpose or non-infringing. The entire risk as to the * * quality and performance of the Covered Software is with You. * * Should any Covered Software prove defective in any respect, You * * (not any Contributor) assume the cost of any necessary servicing, * * repair, or correction. This disclaimer of warranty constitutes an * * essential part of this License. No use of any Covered Software is * * authorized under this License except under this disclaimer. * * * ************************************************************************ ************************************************************************ * * * 7. Limitation of Liability * * -------------------------- * * * * Under no circumstances and under no legal theory, whether tort * * (including negligence), contract, or otherwise, shall any * * Contributor, or anyone who distributes Covered Software as * * permitted above, be liable to You for any direct, indirect, * * special, incidental, or consequential damages of any character * * including, without limitation, damages for lost profits, loss of * * goodwill, work stoppage, computer failure or malfunction, or any * * and all other commercial damages or losses, even if such party * * shall have been informed of the possibility of such damages. This * * limitation of liability shall not apply to liability for death or * * personal injury resulting from such party's negligence to the * * extent applicable law prohibits such limitation. Some * * jurisdictions do not allow the exclusion or limitation of * * incidental or consequential damages, so this exclusion and * * limitation may not apply to You. * * * ************************************************************************ 8. Litigation ------------- Any litigation relating to this License may be brought only in the courts of a jurisdiction where the defendant maintains its principal place of business and such litigation shall be governed by laws of that jurisdiction, without reference to its conflict-of-law provisions. Nothing in this Section shall prevent a party's ability to bring cross-claims or counter-claims. 9. Miscellaneous ---------------- This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not be used to construe this License against a Contributor. 10. Versions of the License --------------------------- 10.1. New Versions Mozilla Foundation is the license steward. Except as provided in Section 10.3, no one other than the license steward has the right to modify or publish new versions of this License. Each version will be given a distinguishing version number. 10.2. Effect of New Versions You may distribute the Covered Software under the terms of the version of the License under which You originally received the Covered Software, or under the terms of any subsequent version published by the license steward. 10.3. Modified Versions If you create software not governed by this License, and you want to create a new license for such software, you may create and use a modified version of this License if you rename the license and remove any references to the name of the license steward (except to note that such modified license differs from this License). 10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses If You choose to distribute Source Code Form that is Incompatible With Secondary Licenses under the terms of this version of the License, the notice described in Exhibit B of this License must be attached. Exhibit A - Source Code Form License Notice ------------------------------------------- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. If it is not possible or desirable to put the notice in a particular file, then You may include the notice in a location (such as a LICENSE file in a relevant directory) where a recipient would be likely to look for such a notice. You may add additional accurate notices of copyright ownership. Exhibit B - "Incompatible With Secondary Licenses" Notice --------------------------------------------------------- This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0. collapse/data/0000755000176200001440000000000014066067064012776 5ustar liggesuserscollapse/data/wlddev.rda0000644000176200001440000070632314066120216014753 0ustar liggesusersBZh91AY&SYt|2U(@0غ<}N{y}>qihm 9f{sww2\ݪQCwܮ>ƽh'u"ݧlU5ϻnW[ s=o/<_f.6FCvvtmwP("ERE*ET1E X 4 ҵfjhiIA*HPM6e kM>r;/E=8=:a9\#k L*rΘFlti 2/%`ujo{'/Cv}ӗ>u;{C;{oNBΞzywwqNӲ]6YNѧ]lms{۽뾗M>myk{z9{eZ˛v{v潷olֵs}[fݨh&& 4`&A@ !CF4Ah0`M000a2`M14i0h6M xPʞ M4LAL&@M4&4hL & A0ɡh&@ !44` $̘&L&Ɉia ɦC!ihddhF4` LM0H" 0ɡ11 @ 4 xLL>ASC$@ URd+ PTAR k{y3-/!(H~* NJ=<(Zϫ1+ĹArn ɀ!%@(@(QQQ@Q@QQQ@Q@uy <( 5@Q(GGFPjFY+1aJICX*T B%"rF@(`fFʢ;fݶ-qsqd( J[JL Z1DW0#T dR#EoȠ 0"aR!P``"qr$B*ȉE%4iJP*<B@D Z *kUq8ZikBUR$T+ƆO2bZUY52&A%Q  <ΈC5 "(jAq\-@U&D('"`E2BD:\auXCZ1b1@Q<Ԧe*&h%Q14D҉HDH5*$JtPQFD@:UJXƵXִ1UTb"i"$M F%"DHx FzJ@E(4L +iRjSZj #Phb)4x&J%D3M)M4MQO< B UPPTgQE2Ӏ TR+R@(f QbA\;/dk0 CNR>9ogTR!6VH d"rJ@+c 6ͬ[9+cjL+X 0V+P2`緂> ++݁ZS?"+S> @ ւ[+VZ+7Wd -ܲȮ ; +`W]6 +ʀ6l̂ls`.Wmvʹ ܁]ҞwV VjWEhA^`5pWZ+EH+q`H }.<+*r  \R+ynP؁\+r@WЂ0 \++.lu`IWkAZso +t@*}+i\+t0+Z]@+@+gT قZ+g+:V+o[V-:V ŇJ ڀW[+s@ci؂{$V+[Gfqn4V;VYUhWuw+ߢ+&} u>0+xxsiy@__zg_'mXYW -A[|8^z>x+ g }j`DWE}`V++\ ד{{@+vw|Wǂ+=K\V 9x\LW|T4<puqJ̪0prFb@T/T&}JzjV7xdG[G,X : 5Qˀ(@QˀG.P(G"((QQʀG*T 5QɀTQʀG*P : 9Puj 0G(P : : : : 5QGF~@~P~`~p~~p~~~aQl@DP ]m^ ֣e ױp4hgQ? bJݎ* +"2):L$K3L2S*dlOaDx4xZ%GL!ƿDbȘ*3 ~TiGv(L0PDu";%TeĖ}2Vr*+'ZQv ܊^|WEf Ԓ+2+0+NH@V@8+*XAZ! B 8+[0+:+:+VYkHEkA[ P `+) AZ`aUӂ `e+_H[VjYWl&ePQVR9[V'f e8+)YYX+R+ V@ZH5r+Z ldV@+ قa+DP Tf ۬Z^¢+4  ihVX HbZZVVu୔PV$VV`VJ+b kZ"P0+<"kX]+T+RL V[tEZV€V€Vʸ+nh ZV H+:+Y$V+8+4UbpVY[Kx+D୘+L hEd 4XV`V[}Z P0V+Xk!jfbhf jVTkj"[KJVdVr+ ViZS 1EPրQA!EQ.B(  9(ِ"x0"+mP0*)@*$`L7L퓐( #+_WI]Y+{E|/ΊK"&̊[fPsIMܑ^ @="Owz긞&dI$WhIQLL!BB%RgPi 埳 \HZͫgOKj|~X HK}JUϺ^4?ypS|s+kTz{nz7䖗E-ǧj?\bUhuN+{o"-MV acī.JQ"A٩RBTH$A$($UJU(UAJRJ*E%UQH(ATU%JR U R)RU$EP*@UBI*A JE H BRT )*)PTTU UTT@$I%APJHT(UPz00ZIEQUaBJ*%BJP!Da-(Q..p(9 97(wGzXbT}GQ ?/{G3zb˷A|vC>CCjfGsŁN2A*RU% RJ **A@BEHP UE%ARP*PRTPTeW!~+m~늇v[EBԲrun47 j]O ׹VSsuuB7ڞbUH"0zD΀@43_oOg6un820s*pPP&ٮ0Dbx*U襱V*zFFa"7d(;\xyת)N<̗YHzNQ؉ "(b! F't?eY-%{Nwy]^3q@_m;Ui~</ (3I$U*H RT)%HRU*B*TRfT+X%QC *CQX bcJ(BTPe%J4A(qa T Zq]5ϲ3z=gMupK֓yN:Z6z:0[|H`Baatf9ERr|yrOQ/|CµIRTa !I@ %C (PI%QJ!Bnf{tPͮ|^OI:?EI\/!xN%} ecte8F_}ΚY;|}lJbaD_%v IיNl~ V6"E4p'3_rj]f.4^ vX:)Xzų VQS}ߡb(Y 7RGD1Go?U~]sV?V6A@'D<5'a' b@VU PgHB?S;ʼ.8K7Fsi=U{ޮdJB% Ϟroh2U/ϭB3 }ƹm4&&Fu fn~y8M 7Wn\!L ;"%xo/Ҩu4HL[^Z 6 {[iRn< [x_kuxa[ݖȦv"CarIxT1@ T8f k;~/2}o_6kVtlSWѽH]!\@F]sZDWHm8\|Ϋso *`]zݫr@B^L$ OMFYV ǒ%6gM8sS);ϝ%'b̿: / }g`+XN3a|7{|UL#Rvި/:(Ѝ?sO^˱ݼ+&.Չ?(QBB@cQcTČ'$/%]ԡEM;OZ2>ϥFH:!Oi*fF;T2V wh(Mfy ^5Hq9)fӴ^ϩp٬o23I^8kZԻ<) Q>+vC'sϸYU dʄu}%2 D@?i\w} 4"%xOӹX$ޤBGCehEvTSʄWE|8@P:ZaUEvTS\SEv3} ^n|T(`pǚe,Td/1;s9rAndė{Ww.-ۊ5KTnװf~K$16w|Nl] eQm#K7gi0/Fg څSuzyT2t}|RupB!kP-m<H.5?vU3օv1`pG"%invz\CMv#0R9LEwhTSW>`H_*)mB|.6<{Nl/G:Ըx ! ?7'W#$b6 A ?w]> }?ul# ͗F^>s?8Z%R}}Q'pms/L &%2I\\$W5zcREo (=z$o>ˡXS}, 2RϢn;;vm6U9 [ZuƶÙ+H]`!"X1~:JZmV t$cbtDVK&jz@!=,"8^x\rՊ+"|BDŽ!rڌʍPLw*!"cQerYP);r< W֜e s>7> o}m[JfA^ơy1 B){胉mRvx4 ~]N0kzD߹\7CL3b-tW/Ez̄W}yWݵV#$W{_i"mTmyÃڶedkT_ڈ/ *o纟jL%dp(jcC61(XTpD}sĦ||]?5þ.{ 0X֘df1Y!q|b@;~~^o~̢^,W9E{lUq" ]۲"C!`2113.:r]+E8D=ovбGѲ4L&3>$[b;sRLScC66ݖ+X70a¼ `ԍ|H3zejxxQr,&“+X df 59[(*Km*dfÅTS`Y?Dbp8pp=VƷ"ie4DM>( ݒ"\ ۇ-˘V^.?<'7N|Cp])t*H(FUPaϮN8|)Sqk;r]vغ'mp[,3+ n}Њ}oURi~];2Zvpck1OspTd >@&EmQksDž{{ˀlk+⸴=J*znYͥXu3Y$o=}FC}hAD t\<)lB} >ODQRG<֬h>O,3wxf "k̄ ְ' }e4Uc&P!o b@w3v \:\"|0r~!f~iI L47=J.j'镽V/!iOՉ>hv~c?ʢ̊+2_Y>T]kl壻L2u?^!U3ZS) ?wyijȖ,J$0MEc aʒ;~A ף{Y!x@i} }=߅- 4ާc؞iZ3L־|GRݩr I\'u=)N_kL\w7 vܖ>rR /Sq1gݏ&wfVDU @ 9%PR@R?f Ac>-RTt> ͏W (+4sn22mZ?!H DLOI$W!+sEvhE|;]LF&4z(2~ !3隁L6<|愬AUX]<Rʖ`l0Ow6>GڂOt$Q=@~ _s&Z,ݏf^uZ1'!,;ncz7弖oq%=pZy;(˩q<+5mcxv;[ uƑ%Q/ ?kï'ܭ*&{C $*jkPu+-~օh"AY 0U7]i{: }XcgSMDE5[ ׬t\;ﶵnF~AwkZ̓y/o) NyIe/tm^=WMʥ9^?oT2PQ`qdv`\)殍4A9tNVZc-Ly9?w+Isvɺ_NcB xIF֎~;˯2 QL~Bxhu8+}?=󛋶w7eKwyI-O8W|p?5hw弩O!oτ0 `0 "N?؝Lk<2us-kӭ _2|]O@BHW+:ѝQO>] H!17֑3/F|TБ{RkBey#FKcrjR/|?#YϵˤSD-\2N7L=K.A%U\I8CF"ǤѫtOm Df9b%aU7#rI]B&A|BIfijReSi[q[WX(_;ɏG;VGGi<_@K 9D`|QovlpXnGf~ravձ U{T1?-fMi"\֗V4fcҨ{kl#UR21"}M-=󶆀u8g=Jo/(yo%R0/1-SL3yi}oZ7t_5hʂ(`Q};xmSKMo#=dk{50pr9g$ōymh!T5U(V9>;5yhZi1i~3*HD|l?<ÿ(F6}mq=`$'F@B{>KhNdL*;AE)- D/Eĸ6޷-ױe%d 6>5{H{&hJ*j⮩A5|R^÷'ط{ K3U@mR*kq!b9-sEg&SD(Zgc_+T%⡢iY8xwDh*?쬌LƳ_Q'̕wPzIb(U <ǒ;ERpޖ^LD=QP mTq*_\ϰm^KMu-ʥ'DNtj=Mh*h}}1gz)EsؚmT49}uQwLb ѧblP Q8tQ c-khgDv8a|F;;~k-*_"9 V׼s !`<46^acqjˀ.3ߞZPNn&+h?vOAh7(d;2k'i/: ~ז@8W츛.ϽܼΟ'qHq\6ciY9-z~Ff~OWp_{Kpb:r|YU8_:sDIu{蹿K79Kz`@)lbq8>:VO=:82ZF>Bqlmyb"wDwUGٱCP2Hj elz Mq_ 2KI kjOgʬ{e&#ê ^3'3EakUԤ<&ЬO#m\\<+ [< C3,@a/BDW':q%T=ŏ豚m>M!|=BqN*0=}G,;M.  a)-k?n[n_7kK@oɆN~';tf.P^C_h&YC'C/#-:Pi_h{ bwY`D-L^nT3re4{/a2x1U+/ֿڞ,v˃\7D -j z=9G7tuX@>*:Bz =Cm,|g'&wm2lO> V ȁ{ק {HTaX MŊ&q% FYCY'7%:X|z /XT !{itZGD.)Eш1lΎOYk2D11͖t'nƿLf]A`//ŒtJ ړ7[%c*[kzm O8~?Ã24wƢI,)^!bShal@y¾%1`ԟjvSqx`|d0?P<Ugh_M7{-ى3L/i׃^°pd==qǴx2/>_nKBxᥓq{ ǶkY}0Sw&#׃u]_ͩ, }7_*{bN>%ڟٔ2W4w !xq[QST0׭rSd^&l2~cWhOF/Gn[Z\xDDu$V@`*s>̳bͰ$o6 ~%}ƅ#;cM4spGy*ػX86_!(_ b hCJ_&P*PI Ls'p;}~ag/ŷrkъ;>tK+=xݵ,pgPX. {:}(gEݶ)>aM9~׼skek!ǽ<6  )W¥3]tIy#~saXęA3/J1n;Lܚ5sԏͅ8zNz9G:Q1^ӹ鬏za@+ig;\[2^mt( UU](U,St{28DCuVd+McxG[opB~y|H k n-c:]{-X~rsŋOz`䤴P g ʀGKZ dved69]iQrY}Mjr,GouDV,'jV~-nNN-Jߺe3NkL~Zӭ_ { $5<&o? ~e}z)Tճ7>Y*^ecc|=t o}uEeFw=Z`JG{)AFIfI]߄Es]!Iى+7oKa8Q[/Z.īϐ0U xB^E>١+(O:kN@mGVVhB]*SYr}pkVU?GtLk',,p H d `#Mǟ#]Ixo=nlulkުorO=׍L\7Y,k 9.$uϱNc47I} )mk?w˳3A|!Obx`!)g@L?R&q9'oȬY(Kǣya턣0OyGw;|{kwWƢ]ixMվRTUćBMQlY h㝛jxwJwJ;t(yJ&w]ov(1Ij=%7[k ,A_|V<1lם<'߽ա/XL6z;,bx QiO.c};q.~NQ!O}tS?:cD׹)U7Ϝ^~ułTgG~4 b>{JH N;39N1~McI.C_kBN8DeC饔s'[[vt= sX:z׵J=OzlMOm" 6y0%A=IlhPu`֠ٴBR:kL6ah2Ƚ5K7>a7q19 y#BMsWGBMR*/SpxJNCTrU= ,=پg55SKQz΂Z=:3;|ؖSV7 &vWc:,ՏP83ΙVp eʶIi[ek0Si?Y0u,xMr?mO֯Ջ5@)Q roG?J#㱶\y0KZ4,yRڧ.B'yHeӎPr|ݨR-?s;{ 2zh6Gb8:H-ץFl ֹd脆zK:xIᷲn;Kԙ9俣9$9Zқi ŝޡ$p ;=:PyScM??AuynzAՔz#R>f|w=Gp[tұnC_}[+I+ V$V> IJ{a_X{dVn/cm>P_uk( <_oW3NuƶZؙ0jϹNJ}*OzMϭa//}mg8Q̙;e1Qڟ-#wbS!ʤ-&q[>} ps-įڑ*K<7bj+/"Yo؝Wv^2xџQ\7/4W2n Ϡk~(g#޳k\bʀ@ %"Rg@RP[hf֫VRV,hyҧT!.].5:u8I6r2掐Vh!B[[(BB &$ˉy)_uj~; \/}0b7.m,ޑ <{:#\oXI48hA遆u"@t[aՌ2`ЭՓfMLoW,dw+Ct惕k  G! "3]yͯ g˽Qc2&'}vyVP)]Mukto"b+آEp6`>.[}kYjҞq7۬> Bi.r%-ߓ#+}m嘘2WQH^Vgq׌mAm<KS@M?ܩY颼ɠ[r;>DqB['L/D0عIJZ)FkDCЧ*qxL?qr[*HGnXRZq9?jE_!yg 2:CW~8[GJp' ksruϧKy~Ir=\E56m)J+cu4T"E$03RHi.K[S(rbc=&k~<,}Ä9&Pt_XR|=S{: @! JLru7;r =6j"]=P?y22;x|1Zux3X+)w՜=B- 9w=ɏpq:$xSA)ugȁ0B*D hQf)8}YM=]YvO-pBfM7QOf _YDNnJUz@4OEu4 f3^uӣpJWWt}wFG A B.vmPW&X4]{|`Hf~k?B9ւu@B/\ZY\L_ AN?ΜYoAz;Kݺ_,:B"mgkX|WT^X%:$@ xpHCn  y9{;K{5hIxA`/o05/-JE%v"`a)QnɂQ_|i*tejgHa/Ol6P$P8AȊBE~D,z/ϴw{=\qu3Ie_qw3^duG[VO"A6 [H%JYV^75^I(8"UXR()fK;j}3YK'6-b)/IW`) :_I  p XqH()X(RC IB *AUUPUIP !T4$37PH,Z-ShԹPƳO}HŌ+ۙzR"$* #iUV}8/8r+)L\t7˿-㌨@^~xq_< [#9O휫8*IwMҪ_+0HQK)\S'# >-O;dLC0)'|j'=.6co٤{\ӄ[dovV2iDOS!l.tO ̵e)MhO|$fxUe9؅q^7G S,4;'icG5}? 0ן.wZ6BKqc^%69R덯[SƹayoGp!o^7B3v 1@t a_}Չ73DGۃHiBY_jX|MYӅkZ]LUsdI)yTy:ĉ7'cۂn6,R\pJrsSmSw@! o#ͽ7:]txo,/LFN4͜oñ,J@LpCU+#/RA%LkTRjۧVwz]&ǵL2PH*"?t*ˆ4,kƫLe;ez\Zu4r45x䮆)]~N1nű1kN㗇i1Z^S}6)ζ)d%%SIݔOE+ZG@9 瀉 `S+t1LәFԣKQ: >5cd˪*BO[cU"13l=sr//*Y4I/ح J0fƣYMp8@0>Ÿ_y Fhʮ!gXM_~ r߷~cwXtS p=plq3!G|ևybsHB;X~/Rk訰opiRp 7>ZGS|S"/?׾jX:>Zy]+ys:/[X\3w68opn9 8 }<6U&4[)G!Z#Ȼ[еG7JU=gpd+#b@RCRsԛS|^nؐfciZ<`ܟYuHŐ|8MBVryϲg?aWiS%TRGĩ/!\9_su:Wɺ G~@a~&iZտfᗦ m3-qv6DDr_*O-V PcH?FvqԈaob* :=Z%ؗ}|m/P$W<&.%*ݱI mpjЊ xzhRTe!*ܭ3 &x᳿ߦ$pAݡ͑_xQ Qy&Hzjhz(=HqgqIPI.M`gIV lG]!/sįAylYo>BL@]HjobId``Sinvݦk1|t wWl3c3 yO+#TjP26T.1ԓBd< AMY6|~@0X컛Εv'R7/GLt,Xnl*3׏]} &9L -MrAa EVnhܟ@! ^szg̽9%&^"Kh=(`3wJy7%1[@@x0 H B h.iy}3Hɿ]*7X[ϑ}wM=B峂; oU1Wk#I܊3N4<Ī×R⼳:x $];) *"c Vd J&@>l h=>7Rw=fUD%}1y@ `՟zך7BQMQ7Na#@ljx]S}íuȶ>2*xLK½~<բ)e쪯BZ'iغ o'ܜR Ľ^Ի&<D̿w=$8Fͩ6u# k oNBfSb <`cGԛ;--oՏ>y {w1K{i~ˢL {8)!] hنmG&sFpoFW&`k``:cڠ @! Dgn0jx,78޽6 2`Jwćm90/-;ЌP f@;I@3c ʞ}r?k'u6~ "Խ߾^eӴx7:#2[mPj{8ix`&z 3ADI}$窘[!ѻl>D5 m][ H'V=Q=kv] 9U'w;遇Lme#Dg'rܗ4 0oWKMɴv---IԻlw{ήÈDU3?g';K]c68h%ldsm/;1j٘ʱhx? WURbdlB_3'K6HܢM7$vYz0nSuJ.Tq6R5we!8d?O]ߢ9)UêWMХλgp̩!2[2PZmǿ񿂿 K3a 6m6u3;g|l[Mw2U;-jYYʧ%_ QFOj)3Ϸk _0ײ ݺk΅{V]@W򷶗kZ>48 >_-렢t Aȷ\j,&Y@!Snng/G6BLn2A0_#m(J/ hT޸a]:_rD  r-9/c`FATV@ P3Q B ! =OKaK|c'~IC"O.Z7hG"=|[2CNMr&<~pEYmo,6Bh,Qf3_gxzA|C! ހ]rG\xMM`gXa`T҆cڠD0Yg+N+]I1F)'Ke -!򀾳լ?11BŔS"O!75 C itƬтk&Ԛ (e&DXSG4|,&:Kd] "IUZ;Rپdg準{ϐzU.L: R#5H}4hl7KhvCūN.(cnKJIb/šRa=F\\XZ 2_+-=̎ 7_-;oiM*Zȣr(4R0J|˥lgMG+$ML.aO$jI+Pj8zg9>HHvj^q< 9&xh=]W#VYǟ1PiРXԦnE!=aqk1q5n-Б*$}D/+ PqxѭK*dʉ8f@Hq<0pEŃN}TxYI/c?V&fF^Fi iMU瓕pLٴVx>4fJD^&QWTE9#`> p=Th>%?p [Y () \9_XY"x P*n0G4 A5&IW6 . rx& $2R P>"x7cmJJ vv|؆o؍b tgg+\5lAſLppuaeY8iM TmY:.h9]<(rsTsSD*8t¦ם!u^|'u0TØ_]ST7L> ّWRV(\U3wSoF -0kL@ s$;iW V2kj/4jjLzV7Cd͜HMx.ppfF7+Y*xx2rV\f+V3 0=;suI)LITn \<5qttűU^i¶oޢ^וIR CcqNkW,|D@{ )ޕ L[#`BbT{uTvh PWI;1`8VRj<dȆ@(Dn%h6$( j \Ș؄$-y1UVP ]P9| ]e(E1HDNVWSIļ5PyeTITA24,“'9Ät+(/׃JmI SjCfu-I&ȀxGj0ۤamPS1bRp9H&iˍ&ѝp),Z]͋, !>Al⾉`% BF"!uG{t@a?AmQ"!0!\fΞY0ͺHfX2TKA88iB3J3k.z4cprxǙYbs2_E4ARf%eǸ6Q8X l TubR /^0fdN.Bb,lRL$G%@)~9 Y?x`Ts@#AV'f dA*ݜb!\Bf7D(ϕxRl'gPQƧ-G/GyjeQ/H ׎8 x)qm^儇+]<ԵV8u Fa705!UP$l#"\R8 1IԄ [\%]e/:z~6UP1*͞6Hv5&qrGʀm9SJ 7j1u)I,2\0b·d0E 7%"hywقuP]HeCPVfjjBZMه.ǫFhLZ%[k ̭ppnaWb@Q KSC3#?}XpqѺ}}@x7Uۖ2G8KeDӷ )d*\ɳt2w2RVd nT7NAӍdnՀ*+7·+lGfmbnAM3@NE)*%&Tdb$C1pmCG-9zôs*بJ /f U$4jFZ) ijApPnt@hf*h5(VYKQX5tU!Az Euox7- KjCPỔe *h8l*,":=l3KP2] hd[+-V) =sYEDgA(4(+˃Jje-Nɷî r`1Ki%ԁD!:XJa3^5M9%b GlhBz@L?IB||EhB{&2.b41Q߭[znكN;HRka :8SV*QJd)]/:"Ȣ:*+ v$pAf%@$4Wv6-|T*ኪCFȏt*R˜bY2C!*6^F0Z@̏Gr ,+`)Kʤ9dVh{hJ{Ş;,fG #qŸ#NSD%KmڙC0S̜ O%b{g ֻYkJ7e$d\X-o}Cw2UcZ:L}aO4k7A H!טpQ#gaa &AmUKgCA|[Kxb YkIP͏{J{*c3pFlȼ#\ZKk0%ab6(H6XF͝q(QqP=Ҫ F .vQ?PssH t𪼉^W^!.mD߱}HS8 !'QD⋪D Vj BG/ўEtP$ Fޮ{I<#!oHB+*:ktV N&IZtv7+J \Jq/rt](S5;êAn坫1:Jx*~P)Ufr܆N.UrC+"C̈YLm/b6E^cB::!b䦔8⢘5f3:1V5$kx@@Z*FCI4sQ/%%b"e©PWEEB*|3n]_8J[D耸=Tjm(e0{Zri$X5W*tြUyYN'O*% !ZD6d M4PAė2o0j9Uk>&,-|픃TK|4Q..ԏưk/gibRj#/fJ e/ IHX_.\ìrԫpMxtrW[.{ES4s^,8b u.*d%o.(4A׬A,^ xq y-Kz+ԯ$R>Ӊ50LN^,آc>Ǯ*BNU(nu/hE?*~QYO jFz o\9t0 ׬A71P0'jEycJ&p8-T͗SnցԂO(l.lLK?L/t.WNM]dJ撾Wp&@!"!J`% L싍Lr6RZccF_0>'@ew[POy+ҫ4 O4K//Y,K(!40EfC}Su&svD׬7j O>=!CzKwUU$!3RqiQ{MC,w1SV9JR3;AMTKbrG'YV]PL*߭s!=Y1_NhwzDd]N+1;I+ | <)ga8puffw+h_%^ [0!Q탾@GҾ#f .œD^ݐD7,e8i_RfUb]9|Q?ZG^3WªhIf*M~Խ#|~,RLNN_rBN mGbE[~*QX eKqd 3cZ?JH:)})笹i9bF+'Q1G)!~(F"e 9hyNl}8K5Ki4؃RG vip둌zgZb[pcZhP׫QB ' On4<~5od\lD𥳐AttKܺ]ѫ΄H)4F7hgU2B⛶73Cܿn99[JMlz 6G[OqelW5&npT"Z$^y!EA_U5DN:i烲v-BbK1QrA\%Hb*jVnTd龑T؞Ž=uzm9=QWmpW {_GuѺ"pT Sh ߬׉< ʹѷ>W#7O='ߘ\>XXQvj02m5t +\<2F_U!55D_9sZD*}Q)՞~FAN'uW+f}/OK>Xٙ6 w*m2j)24S]gqy_j@3D^Wm/ZrGAM _/1>Ն}50/m{{#ޓ`p0ЗY@:UXc!TpUty0ɮ?l8Krrj:X:hԃ뷢 ^遭xUM90#OsrK'3~P['w%5dwC-ˡ-ًC~nUi+qonl(~8? D7ӵ6Xtݭ8NS](X2[pSZ,/my|y. Uw4e~{t0pt{ݙ`x5.x%Z5Sf6cc0Ru~쫹 еwl^Mc粼NO*3;Lv>TZ7Ǜ`>tؐH\;" 凜2oL~Y;LyɊC.㒸t. 6$%4Fx۳~E.Ы;M7Z=!nQ}dy60aN~yOrlc0KЗajE6.dxnnHS8UL8NɹYoRgbN X!`EaU#fcppiHz, _=1 "lյhApxw\>oK㲕u%ΑS 'i?*v*. Ϟsx3F]=7նOg7mmћkjfvko tpJJsvWRnU,>}g&izs߯rY vco5M\Xڭ>R~ S?b V_*tQq\>bN|J$[wcdp){:MՆyެ.狩 s}*d,t~b݉n_6RMdJcdRѭ5>#H8ӽ^S"/3 8;TR[eZ#%&姨;[<3w[^B+z#V\qڛr;&0iC_&+yT1 b ovuQG|d/CA6T^m=p6o1*1}]Շ/vܷ#iڗ?j#Sdz-TGFW~sv?C3| 2/a1ۛ(> Դ1헯LZ 9T6dǛҮ]c"Sa=Gko{5DI0;z[b2@ wHoEm שmGuǗ_hi&,4&bv %Wy\l/ίIR7@sou ;<]6.:ذ k=՜r}6XnJfDhOai8;4ӳs[-_q1?3E:Zse _,fiA)QWT󯱻>Uɹ RbEe_7W:휚fmU>A_~ ^+Mpif.YpmHop܋5@+y2K5wOԉŘQ}쎟w$jɻ9͆X?S#e70l7/׳uW"@ ~i#a";WnУ'PXlVBG߇iצ⸣U.c<\*-$V|\Qp(Mx{t>rm Q%L}DZ;>K)V7+fk.QoM$`w!j&v]Fx!asNpsUR-)5Gr[O7Nx^;A(Zb覫ùw],зY*TS"[PS>6,>颎=aE6|E*KnScYh+gʛUبRXh0 ?2wcS }+*| #N7v"L͑+#CVlI-E/䄱l=֤t"LQGxy+-L蟍[ _I7&2nͱª[9S*OmD6Ê)PK5 bXd.ɗ3sIɶwtZ&- d;#bQ3rN!)Y&.tLF=8够!!*&6kVǾ7[.\5q,N+dBiޮe"=yAPyb@ͮW/U4.@?}<.W,xr*4Ǭ:6R" )~a[68 ƛsE}gjT)z ڵa!zGP )pɭEe, `IdQQaMO7i$ Ń/)-ZfyD;cWW~JzĖGb[53hb #j!8<;w#W}QGF۰bՠ+90֨Uٹox՞Cy~8*8)}LeD/7Bn](~dWR \e9cZs1̡alUw9Qa38Qn:?N4L4Tg?*vśr4~n=lnDoj5%gBTǁg!$\J/YaXEp},dLv:6;bleD .&Inü{w6ꭌR(yǜJ\r%w1S3}jvYd8Ĺ^g0Nc@U]k֖_O xȽ.'8 f1T,y}53q_o$wlF6krnW?;"rKZotU9<wӰ<,awӼOb8reշݿ(J.> u&&L*i1T\K^ ?T7;){|{0{w$= & tc@}UJ)}͘$4<߾aOġFHVh CB=4iMjd9HmX( yzK(+f~v/例.=eVF-/Pȭ:9&o|?2BD!nYW q1.3HE;]#UGC)rLx~7X:f\7շ 4+żJ*:&l#?7ik?a܃]E:tldxKIQ昞[6Jff)B7j㾌Nf,jfq"b啂O;6\h hr9&+.Gq/j'Fש k8Hto>u_^\;I՗i4qcC 1 s?y9S1 Ƌ]_54`C^}N{&W EU+pX_Lz9~G8>U˼GRʋڹGݺ2G|Zn>/]GWgthh[> H؋|_Qg%1mZP̂~MR3`|aS_DtYs/* >1wrsjW-@,KǍd]Uc%?[%VY NQH9)Kl;}O2\Ց+8q{6:gxR̞nKW\$xi%Z89His_\g&׈=R&`)nZ27e'Jy[C/ f\Z=m /ņ7}) ?4CjN/su31Nϵ"-EZ>/)2x41|>͛?ťdkr2]oUT,0nu9O C~c *"ےbDnS_Y u"SNk}d z)8#W r[gM:S}:N-ms}FXfm P!EɛKPVrC/wGqiS4 QyܽEz+YZ9@~|N *c?jXr\]+i\7|0ZhXYg('2=ғ.2IZûNMrUNW-["0/ :Z*?-覂C˛bFiz_)\zWKZe\eCηPeg"WIiX]T ]gMDL ^/^qT[6URd$!=єF)b%XyM7\DIe"=js9At9%&JD&xYn'VRvkcU3dkNcyF'-\w8C-vciL|;Pqoܻ֙Gjĕr3#(V-W)=>"&%rD1>BUpIĤģh柡K9jˠ+1'?ӭ2X^Lܰ9m?kATjsl }JM.xH)(tJ]nɾM 97|D6٢ݺ杞~O!v!K6jcD/!90_' JcUD,W8YߧtdhBbg~v%3&92$ȷ]x ZVAA@9GI#6wF`cgNٰF3μpr?mR NBZ<=jG}5oT[;ǧ}9*'uƶQ;U&A2ky􎍹ɍO'9ӵ wh&r# 1DE?BM %Y{鏇w=Wnwydsޣh-j{W5_)?/ʣK GcRƾZ_1.Y[_n5DJ:{ƨ]SэG_"ygEex 'VqtYתM[;\>\giB$F1xJj^Ej>,:IK`a{?OCo1=ddr<>~V%/z273o[S{Ug4j:co5o](@޻wc2H0蹕<, T{- _KLn ZQ:u O+"G}+iĔ$7&&K@v"?JMΣ(C\傗D=|8~?Uk;6b"SG?iܳ'џc?hfkZ --e ג;3?Vʜ;.|bĎԎAkd̹l e{I;Mոy>*s"˯=)iwgZ[%upEg|195[g)K IӖtJZ i_toφLկev'n:W1EgWQzNwьhVLT}yLŗ΋ozZkdւR*ٹ-<- YlϖS-1f4ۚr)gzͰ RWe0#1=4P|e(9vcPt%7y|0#Ve-2fSd8㷙?K+В{1dp;.-\t6;j9}}QH?IAکyiHtuS+1YNmfmii%#ߩ ؙͻiW! ӜU#f5.vb4ݫD/r>s&73_ )|鴚hԻK;3I`}SF{1`>iS^݌GқZwMR~cR_ErG>Aǣ%uI:ipw7i[9ypye^ _ D!^Ml4WVi>6dBV > ,_2cgvF;GnQ/ mOy ћ]U:ɔUKnpmHyl ʳbO͙m rΐ5Nq67P+7[^d;ާZ땄g!:CVPe7㙦;bzriUSYnh;o>bO)l8a<qwyY4/V_"Ųvc7.KD]T~ç?#8lCZ_CfW1;dTooߠ|U.o2ok\gչo֜tr_t԰(bbs,IMM+w#+/8cKldgm}3K&OѢزwmzol'4h]AX73pUFUtSvSq9G,V;V0* ,e]e9j/uoƁ^@; Ef(L׼Υy!WN]2Ssf^sR޸uən?"<ĩAQ}Ű%.K6 3G+V0w8,uNmSW=?#ė7RqTunWjM\_+DzT6uU}(oFX~@,um_e77qE.tKw9t\n> Zh7:nW_M-lk;&uo%Ldc ~f i HTܯZ.8D'&C° 6t=&Z]Cÿf{ ޫǑ~?`ΜErFZWγ)f }*~| Ôdf c6 I(֥8/\3^+a #7dM4kgC=? q(8? l 7[]7o kjn|JG^ c6xns.ڟ&E޵_!:ٞŶ1oJWq~pzgwfub5}'Q,n'qb;AMukea<0kKK|1eK03b!9WN@BcMW:OlV<ƚiWv G{m󨎾=o9eD糇X݅LH̆2 q(},_'F^{yeK[>?Z*&aI;0𼻨çMK׃GK_5gƞ=m{ Ua &ؕ$?k#P _|~lhivl2Ӕ?Z GpZG U?J:IJsQYCfg3ٖ;PCA>j4:P7Bg7A_".Sl^J /w$^䜓IlA㖩1x{Ǭ\6s^O[%QC/22_0$TE!cds lF2II~XRH|]˧ TK*Qw'̴>EҼl\?GWU9a ҃lѱi ]5mͪ֕L\,ܰMץMHbYrSh;p]e6 bOڡg9q"U~o~.3 @/o2a>l5' ~lQ2y^ : N-akkqcO@,6:(_\ve݋&MЉR.mtQ:|wUTYr z] .%r@B6\w0rGN2XxIr1[:67NW$&Mh bRP@&ymV7=XqMZ88M\ɬު=(b^o\8҆ ۾65KR|@/o3d]nOWw)V\i+v^>8&/3^LmF]ATPo@B.nBV*]_|1Rq9^[rd֞A%K|"@,K/ yTܹ*jF*uׇ-Z|a3 K J~ڦLFA4#ԃ|j_e{iһGQfb_#:<H#u! 7Kf:,N$Ł @,oeD2mMsx#95C/59*]C{ͮA ZfP߿&i^lS^qEːK+y+Who ,aɓf08 _!nw]F;=Ymŋs_V`x0Ӱs$ѷ'[$/+!w}f"x{2:hZWz7۠S& Z4ppLl~!ڪ%Gj{la_Bۍɽ?*;Z wvzZtTl #Ξwoૢ#-aUO5= q@ZE`)túzgf*Orق>!lnp ^8&&eB/~=xOnߣ{ovwv- ]>w_%{Z e޷Ѥ{蒍Уgb#/ܓ$wHp w>I.#][ k ;J^RxtƓeg!7QF,t]h0+7`Pp0\a[b |ePOӓ;d664l kpH[kHbN@Bj~SF9VL6EׂW Tc! jFJ;p~ fO}),LJ]-NPr-ªOCp䣌'gxnyc}=Uv}<^%J۽:aOVZtnb=ڢZ7Q2u{۔EuXzוxfr~d"tՖ fϟ> uBGEHB?vhiK}{cMt:,cFm$b"y rӥ {E5Dbcv{ۀߤ~Bk;H{n^q#k߶8w9*5h EGE_ݰcw~$4w*Mg/w&56uh]Qi^' Ŀr_AR/z4$m"Աyc>vΒ۫Yo ;g3)-w) :zFpnx\.&>#粡A]!*x\5?7of<+J^G!!:NK YڳbP~~_UumX(8>(,/4"10[8UNݥ}w$\LJw=oz&Ӭ[ݠ#|ZX%n>p^Ii)7;8yD&5Cys>_"~^_'qz=oy ٯH v|y̺ſF"2qj7;]m0Sd͹9JG=D;k#֬8Ad`09Z(?qމf2Jsexs926,8u@iqku(LIݾ6ilTfH'IS&~h†ʄTf[we^k<+T/,EzQŷ9⬂wSs[Ah)^>m菨*bݙIg}S4fP=g챡m4~ɩ*"2Fl2N%Mm>cZU'=Y:j{Mm=,=Q~Jzsןsƾ%ԼM2?5nRy#`XdLMuZGa zĮQ5/&|{R9l//2Erceo{-tsAo?Gd,˼!ﴨ~hR(y>526WI/NdcuivGfk`}h;/囝'`zfPf\>9g71{`_A]?0oqkGAvmDmz{;1l<}ߕWٴl[B?ÿ':#6 ,[S wS^1>G4d/\Z،Mj,1ϧi29~{k9 }~wwl99Ϸ&Ɠ޲Z *Uֶ};<>erپr!Y ů_.pR7ޱyoQ>B .imi&,)i=D"sb \.OMGIfJNKDUi[mroHYe'*ĒnŅXd ! M֢jYD’kh:|꘾C3?DѢX*nEg"V+P_a^yXI(ƀ֯Ej":WSZ{ki?9ʛ߬Yv:;ΖŻrp|۞S* 1Xc t{*IP[0QyzD?!46@#S}OTJejԃQ5y)a3=}Flv%-=% $:FwCz_r mQI=ówa_=t3lcQ/đUڷw4\z@Z)0rtwNɩ*(b#'º6v+UBy@Hn4<>;)'ql0 ň8P?d:Dvj."{# 5ZWmPIwat^JL9DDߔ>'{8[ϋgh^~GoBw2U|8` qa,؄H-9{~^՛计]2+""+"Ek9)5$V:V.?~fUEV_kQZ^ea2ѶUEnr+Cr6U_Ðhr@e&h֥Mki_i+_,d]l{0 q{Q)CV{B@\`YKfc7o>45' ' I:IM'T{Nt{4S+*3!Pn^3Җk1"x+Т_iټVTW4+DB [f|0&Kj^[+YO c+3m6esEi{OvH#cZ͜ѥ.\F;S#,Jnh߂[|q- v_}>ʉ`s2C]yeH1.3hī'M}/<6\ApfFۭĥEF|&[`k6*4DBfˡxf$]WWıs/2}^[ n,L~}hy{VA-tH,ҝX6 kxN.kݡFNh n֡ze,wX6 H_IA3ݱP2ۇ!z Tj)Z+?)&8-8 Ih/*x?+08 ]kdmzQdg#{ZP@/$^%Z+:-jZ~ꙸ@!џǁ.4^;yY,Aao^WnB+ˢM̦#Aݦ֢^zWEs?((x"dhMrYg&,*"+Q;aQu.˝,]߹;0nTVn} |d|n2(LBpMTYpcz KۭVDϘY w|x; M|Jf}ym4/:²u;wOvfOZw1q'XbB,Ӧ̵?sMԬ o'!, ݧ-&og;{eBWab:J.馲N/B>kͅ"LwupT &J!DF,K&2 kFW˽8El~1ho Jw9tK՛C _6QcٵclMdTBыșy;}p4a7n0[1W?JʢVhEz=nm5$?!]~C><㓳D#ޙcHYj}v1/&Nˉ@\PV$`0(dWT(?#0sn_Ocvj;C[vgz490EWP!h9‹|)?Kx5x,̈́ua"ٖ{)H8_\tdoR9t$Z"#Q(RȦ`  AU)A"M)$x^@wx [A|X="XDz "[9n)C JO1qf=Qw`L:zgݥѦIBKq5)OBocxE' k\qh i0T؉\@Rk2gr 69sDuwTaI 3L4>):҉"Ց+C)>h!,YZ`,⌖>CEX)Do ǓP#K@2kӿɂ,gغN=rRkW´[M(DZAޣ W%?@+wQ*7M~[_V&kd y]tԮ%\tɹ>jA#N՘=I }6)gb(2nY;Rw"L]C PaB@/.0ϟk\YzHMa=EdWdX~NӽwGᰌElQ[z+;wlB+M "WEd5A>ܖmXxGBHSls 4 ܥ FPXh3{UE)e O؁:*`%NfFb"18 mocTVu LwwL"Z:wQ3B,֡{#6.LLnT INLYJB>hi‚nW D0e#J %1SO3-o\DtњSLzbPxhvhJ z "E^KU H=Q"]SA LA lEp5Nځm2>8krCyL;9E,&xcf*.nHHP|xHRI)`0N ?hhǔI#H(J(HPmPCoϞ鐯WeٛC{C_Eqa4;;ěyKS0iTYig:.)șP]%qݛR׆Xܛg UeAԢ)66 vOJ|0nY'$BAp}aOH [\+AY0}QX7]Gq+㊎KIӛ! 5jd 'KPZ [үFf?(b15zR*ԡ!d[!Li@,zI3UAp!* , /ExE{tlbVQɨ&PP<6}0$Yjk_DI~<4'.<'s=)Y BIM5j_M<)YCKZK`O<4NT[֔INJΡJ iL@t]gf8Grb YA T2QJJJ~cDBh>cƺڎr?s=~Ξac]ʭ^D(o& R>fJBm I?SZ U H:ڡA"?]QH E9d噁 Ikĵl+EfI2(עr +Ef#ŊVaV#"Dמb(>8@~ i _J~G^yDd PS7UiqhH'[*lVs(jIC>-%jDXR't|ttf`K8suJyKSGǴUdrH לǠ~56B@R is"{vʃ21R0x#M8@. 7щ2raZ"WD],y|?]; ĻeT**k5A )\sldC'ڨMuM"'dT YCjW\9.!`/[!i3^\dP|/V9w-D9%g$8(bSzh٢Q:[~U|)Uaab䈝F(yTL˂ 0r3z&rw~=H=h8h'ix wN<<.ZtPc hHK2bg"~E|t7Tڕ! (JKukNt#<t%H;'gӌa.\ޔ4<+q*,ccG)^;m\Q4zU) U,kjX+dXH0 f-@"f&pEk)Ee1iAV8g亮c Xa#:_B] oL_+_mM``O9ʉ"P"%FR>ܿ>V+.`:^+"Z+GkS#F+/ EdnY<$ (&HG [9^ql[}2Nf<wZWr7c^~^ h"I pN!PLBU]R w*x 9xn;B)"P nM]4D9A]"FAHXENՈ,gʛP[P"%Fwb/r"qLwsb-g4PX5S4T"DnrǴBY * PV9s R5MW_Z1!?lU8Ryi2Rw?`i+Ub DA(Pv}ڥNmx NO/#m?ُy8ο. .AoIxDžCO^hʚt%rbR]kfRk7sZ>#37R"l#.`F#UWNsA&g%'` teo<1lF<&KǠ)զ2B Am 1OALr+%9E8J(25)% uq6_;b-I DJgG+̡J 9y |:(g`9 a)aOEyzo yDQF 0RnGԄIeB<"]4( aE]Q]aF`mқ~Px~+{e觥 p-%f<ޓb ]6;57]v 8X Ԋ C% )(z2s+""r+ REaL!+` _@[C9aQّ[r+tEmYXEpx!w^/G0>sܹfȠ}3>)C),..Zd<kI@uQ QT}`ʭܠ𞱈?MAH3(/UʎAzB❊7{ @$ā)9%'Pv^gy"ӵ7Uw2Ԅ: (1W+ fʹ7Hמ,k'n'2$pIהOd&*gFB);ӡfCe' oY Ɩ΢[6:ͮ!F&KO+N5&R[J(e7o/u|Zܯw #Bm02G OЙ" #'` *)v' i5)׈8LP @r?ҼE w36ELy~㻝ak8Qþn#<+ٌ@6(lLYS]T]v =-z"ƨA+) 3⩇"@-H==i_'bJBNۙkF6WEfY?B+碵^VH$V}*y)rri?z+wE{V@(|]uv뀹mh6`syT wrrpƔs["?e0DL YOey 8+/)d1Tr%9IM2i#BMɇU:9=Es)#%g LHOn`׈c+aukdjrXlFr6+;*u y2` &L1eeo+kKFP?9{ o= :hl#@).8ڊSe*N!,x(K"TCG;1qS,Uh 2zSOTs[P/zLȃflAV{8իa6qq}b|,P=ίqcF&fBܤh>H5Z =jX_ޱ~V# 6]{ 27:Jk*b4D젬լҔBĆGEBx?,x㍭Nkc$ 4| FGYlBOMPܥܢx*,}d5T!,3T5Vn=&Z{MCxQ)* J!@*5˔t2۱yd' 'UD(lې:[3.- 7BX+ѣAM J"EUo^"4p۽`9 +>jCVV}+uCy㥟\[ zϷ< Hps!a/vOLC?+ID)"vTS.Un`9.v M5|}濔9X96NA!`r0'H{6{X^n?bBXcdЁ IB, z('_"syM/&PV"<|!2+s]+$u1ByPDŽ۾0|B''P~0 @~0&Bt+h8S!8a7cl/; }fF d }@@vV@;4)3&DBanv@:O-l{PbZx }΀.C|S:+2YcuEBifB !|ZX-a ̯S ˜( 0R WtBi0ac t'M @f=x{RSď=)W)ff@A ˜~@T G$Q?Y)َ$p ӗ]o:vCT%\|<*#[;:рrꦜ-ܨf{BBolddRaO Dy[1RlE!_pM cHs0YY$pZ:8pq Sہ` ( ~ @y؍8J6 lxO>y{]F]ߟ.}7WZW".ɇ?&28( ,;.6J98D@=(ޑ[[#o&I\%UNC tK JBd!tB!/U' /K*g Đ_V/ֿmB?4 A.p+i ` Cj: !5F#f}ٜa0Gu7躲I?LELY0iO<}ZBKUYAYS!mt?\ fyHVfӀpYNJ-`w&PR*Ed|(?OW2V?? tNm0I uxE&]7! Z@T.F mגdKrЉMmk=a_#;$ }PPYY pN]: Xm`Cɋ'[ Oⵙ »:uHS' `<.F vzpN¡5vco#2y |pN|DWƢ-{PqaL Z htu7k܄nƠj m|NU|Vr:}H^@CA@oL p99 zvHCX-"ޠf%`q(P4_͔LA߻1| gGc-?CdF|"QVܿuf~f @%*F H(pXqJiT&<QԛcVJ a2@.:e%7AD,@C X[zo( @+"+оI^ 4VuUQY}Ȯ u'<"+tV܊^tȬŚ+\b+:+ꢴ"WwbW ڢpVg d+m^D (\ \̚hRn0—vM'I ]+NOE _fKB1"[3p3MiА7SY69] Bpk*&h6~D]E[)˜>p_ }Dxplb31Isұ~cdR`Z$0Uy~4+,Dc3luMC;%k <8s4 b+k3 WOV¾"7hHgHxQ^^Óaݍ,+i " ?C[or`)ẍ;9㪕ixҖmj>(*NP A2 ?b@ӿA()9PwtYJ2n5xeA@C(6iv@C ї7/4WH+p|9r?@AJ"sX?]C?D P6ߺ ANUAݼ@ޕifO'Q%0C+dW=kᏧ,aAREwF MŤp0ӃGTVWEf]}C"tW(h]j҂o;M?tX& gWuO!ݞKi@>Kj7TKHIyA: 5|[^:ʩiYEE/HM`pe5.5Fҥz|nь¿r'܀'&@adiB23)e)G]H<0%Z'fLkreH~o7ԇ6zԉBu2Xyiؗ6y}GGWq$r8\Rn6B6g#3nlaY{J] i{H]wցkAU;IJCǕ#Fs=y:-zhT(tk.8zEAܭ C zEWEy-+nӌ06qW?4lӕ6E/Jڵ]6dfm׸ >.L4dZp}XLs8=Z ǜtmv7bGG_Q6laF,o&ۈÅD|eeDXJp`@$MjZӸu|{Rv=WSQX^Yw/Y'soL Sp4%aS'!PB(MҔ|UZL K<21X+Gi0])ڞL7vy@{dKJQJQx!P?U#E, llVq5Fb$:ߊ\"C?>fj)](彿Mo=עTw䬯b+LE~dWۜQܭm:m.9a4#Ԩ, ToH˧6SbetIHڇg&YDzC|p gyBW.ߴTܸ#SȍJo)K[ܕmH$Ks^5q}lNpq- ρ{t߽2ȟk[dNkCZG9y%/.J;-BW?S27v UXmO ?0iz~V{ |ȉNo(!lX9Uί^?`;>Pwa DDjW?xK8xO~MEڤ yd|53t^$wq}|<%MDRLM#d;?ҵp`uN.MʕTsZ'ylz4tsn~N5[.Fh[L+]pum@@ oN8BEDn:y/^OWoJ\ 5UDv*ҮH_wS[D)e:kPA{m){fSPy™F %`VABRrViu~sU񒖳9$lY*U]'_o*kHtGa|a&lh\a[LkLo dUtPh{Qk/bw}q{M(.>*ȺX} {D qOǾyYGFGCQLgBlwNۺ_+~pڨfA/m\J0}Xw3Ak<5o o{B::{SӢM#8oq z[}#J_?li(njY-GB%_<V5_f$C r=ZXa)CR]OЏsR]նA.ՙ7QN 保 69I`A1 ]r&ZIVa$jzn٭D`V_5R[?*x[\Ʉr3u86.@v MM0mw(\< ֠^*29`"W0] t~Mٸl|iWbK}bBVHIB@RAν=,_5pfxU2*{n4g2)?sk< "c3+30َTZS_,'j~zG߽<f~"CH%%w+{jߧn?s?zIز':YW9ʖJap>/zY#ջߞ1ܥwCIceh H}=?+J@~b/|rę;櫘ote=vzl2[W3B.Pǿ%3ruqRBTlT }(g5HkV E)׋'t)uh n~oPh Hgs Df{"G@ fxs6ZHszv^q`a""!A@H+'>N?5xTI.^F6oe{*': ϕg|U11hiUXNd|F1=2 2Nf߅]S~"C]Gg+-3;emK|Ѵ>' PS!lWm=r&*s?&Qϋ^r};@ީKx!^>l#as^X'ݒ`ʺԣu'\N̦{ (+o,@! c!74X!{r+lcuek6}ՑI!4pKI%&,\pư ܐL׃$[zòK}{KlnxyK9bpƙe%ߵ;5:|Kpp^Nʭ1 :} ˵:׉KFϚz[v<fY1d-ųuIsNb6ob3eJ]½>~'яkN"aiVGVIޖj-t+ηm*j}3RlaVHs腐S>n-k?]N g2 W?2%Mω$@>Mp1wxE'u{Dng0 iCQV%gH7W_jQu ;P+7 IFm"k,t#jǍ 'OUk>6*p^o޷`$}kֹvӎFE_{4u 3SmrJZߗV2F2(ꈱ(@G ՟s9qxQr~^c!L2)inb)>:<׼+!Ӭ;νpxnVXy_쁀HiǾ85v%%]u^VSeUO62ZGwC?}6>]iؿ>"*E$Ypcg&|qe WR XgY@+peS[+v`| {+HV H AVH+0+=+2+X (DR'dB ҁ@9AYVY"QlM+kd @+*Z`0VZt50Vʨ Mǜ6)z|q|^΋"V̊+:+6Yn5/܁v.KQdȮ+qC;UYf׾_Os=7OG4ٍ40vK=W-w x<<*Q{[GVVӒm~9;+jf!ΊEr0W8W>"]ׁ]Ev~fU4V|jJǺBB+{7o[+|E{V{tWEq} +|EnT F\*[4WEtTW V VX"""(EtX@W \+U"VآhY"Zn" M0+2<悰 +4ȭ"ZW譠+`VT[$VEklEs`"[+0~Eo+qEnd@+5+I"HȭDVEo>ފV>{+=+pEn}sEiX"Ed[;p"dVDWՙ,"r+XhTVdV+K32+Tj+xEtΊYZW /(Ep(SEXA\m][r+nj]-8 S~׻ݶ=.ƻ(F>Y'#G.-7GSGEX zʔ1g5| qYu_;EB);>8$wxߵ!ɡaBAPIE0I媰gI-=jgfN_ ]Vz^n]e Ɛ/]<\>w~f%G5U6㢬IZ%LZ T׊xp0l2P뽼J6!I"D05LR3 @BO359rW"4WEaQ\Z+X|gQ] b+|$Vi\>.*)b 4V+"$W:W+"\OY'/*p76{e:O& ^SgLgo){7 !\oF˞]GvoHA&<_UThЖѹ7/ B@Se`\S^N߾o a?{1ys2a dW""Y+ppVV""U$V++fYYVWȬ+ Y[(E|)QʪaBACU$Ba?N$kɫaOq\Ivٟܿ 3! GڹgsOe_$S>qs|hݱPy`[l3"z%gEuW+pp=Qp|OC_/u.樫sVXXEa"X۴p,[GnzxZzs: Q(`)@o{ڷVniؾɯUN9:{΋SEWkYa0k߂ g~ !JmOiB8R&iT Z:/^}7@CwVS͊F {@nӦ>6}L=㠶@{$W+<>vhnH ϚF~ ՠ Ӟ+@+E V+TWo jS&s9//Vo<qk dy`5DWEnEaUݝ,_!7ΐ 辌jǙ9s5tw5OQS?McuӷJHdkT rKJB1% P J|/RP )aLCh.SAu6?6 jձLeQ=V?\! 9{.fvbLx?@.]w˥dZE7 n(JOg%)~4/aVa'K# &$tRYꥥ(1偲{_0e u6ywoYh~5 =sJKK,p 熤-(Z~GX[?Q_eZ}}]GwsoCqfQOa,vրl$^nh'g1O*qX(%ˏ/,"Ùڳ԰K*{ADNx eO>>=J14E72PBA-Ѽv ;g;Yoݹ{׋S[rЈ"DWט@B*+ kb K˴(X8SYP`!A\J! U\FG^1ڛvCey2J{,#/oxɔڑÍ )6wN_!\]"+EhEfJb+Ex2oEf^VdVy矼ѲN| DOB*fuVBg6|^:хWs ֝6 !z22E}{z=-Vg7ڌb!@ofH3f9Gv=Є@= $Ѐ$ ܄U?.ʙS).0`,oF"B3Fws+ oj? YkY+{3bȮC)m]{E|WdEp/mһGs3cp¤v3V]q8bu@EX!hӐPr2n׃gURj)1G3(ޏjOݶt~"ޔ2)`TMOؚ~թ5ͽ$)-'#em_p0+}?PL}~EGWL’ GŐcD1% rHSGx B@?3$::zbbeN!u֎ܝ/#$~ㄎ5-fF֭֞3mxe@r!`1"D"ٽ^|?1J2 :.Jg^C['`^9 V cN:mv>IV)bfErH$V\T+ Wj"E|ŧWSr9)ҩw_wҺ@D1:9wc6@C"@c),cu5ETIi0Qor'YdoMa9Cb}g} ߏZE`P+ VJ+-?|m읿\Q_p[Xs%,/"45ŭ JC$U9*N2'`Ja|ov$Pqph=FɗjNV_6+]ymlx$1{n5lqvYms_W-V1(EgE|VzXEdAYZ'W}gC ] VhQ]ߖЊ" ֢"j+ZV6b+\ +W"^'m?}DVW{Q͈UTSUWm4"Q_܏sVm\ն|EsH_+ƢEyshj"]ЊåVtV+I2\PEob+IACBe1W7Z k^4qwVf펠ek|)uO6jgg%prl1G3VvњV`kCAhˬ5{G-r5ߞ\;uoEwmYUd XށYLȬ@+ bЊ v͟>]qW͖^ܑ[h@+YB+yEdg>U@p v 4¤o _׭M{kN);Wzj(_{McN87fsnKy[}L._X㵽jűY뻾r\  +l0+B+"EhEy"EgX@ tߤAǢQ@PE{VH"+TEiVa(`VWYlTR+QV+0֢̊ԝV`V@HAYJZVuE'Ei""Et HEu%$VW)""S"jh2+kY"EfElY(GXo-hSEkQ^H^-h))45]G-?o>l@X4.`Z֤8Ʒg6$~_v7NA&OFֆ?J`&B!hhT|V+麭nSmT]CKrfR gnjY`M' B XQjC,s];:+l 1]Ef^`WH퓂@kYYM>G_xo#4\q{EkY\X]?%qlt" wMDZ|6m{hP)o]Q@qC 3Uznma0t~|k*r=ru*@LV?&4Ӡ}.SpB!sD s̊VH)xV[mEp謑^ElZ_5٦EmQ\++ȭb+\A^[+ Vi[+f^iEnȮEEkV+VdV+mȭ&"VŠ"4VEsl+*VXܑZ+mg]2+Ei"m譥5+~ھV"tVݰȬ؊?ZmXznCmEa 6%#{Q/#Z]IM.@! <@! S|קq{&ݾ!zddI_޹pp(e9'(B-hc*''hdUs̲p-!RA`!(F9,QFҡ'o(!^tbFҢ[pz@nx0 <>=lhN%3ȅ^!׵4Mcc7@)nN~QF"]3ْL EThmXn&Syu/m*h˱n:?w63^Jtِ܀2 g Qf(NzYܑt`~ꏖOd|}iY >B)>]s`,:3xܵ2i "G}3}WEx%Y a B3I_ױ]k|A}ʩ$':vPaedMњdcrTDSW?pN5UJlw#YA@@*Bv3. Pq}ekBU,"x0~ {<0m滊9F"Jv[r?}+WE~4W_"zȬW +QQ][.΢+8+|tފEbXcEdV[VފЊ^eDWEkX[@VQ"+MljD "ZY[+b5*+VHWe㤊X]El_"ElhDVeVY"Eti"fHȭAZB+ߺWҡ:z+mabӠީ1{`><+]/\shMvϒ!9~]5̘1X[hby*KS+™~!tH:@]&Ggݘ)@\˹b#ʬ~)qp[f~S}{5r~@O@0eLX-?sͶ|NlwXl&}'ǞRknqVRdVYM V 0+"+Ґ ")>at6}'Kǩܿ/qdWr]+w,w|DDNHJ8pݒ SBѱ++rKEѨO}Z_`.O9$ԌR@=n.7$xeߨyvQT/VgBiy X Wc-5Brrr{;N=#7z߀6V0\Ex$WoE?y^E Em[T譽Y_/"'Y"*Q\ +N+Wo ww?Eo_G=4.E&&?2#Ss[{ǓUlutyBsq-۽zIZِSuSEź>Um2`qq,f@yw'c?Sv|pixHSFt\-/1OnACWIuTZ$Ye*ˢ aVGQ) 4 B I&,BO2K= q[x#EahT><>"yAmӅ/R"BMk3'{%Gz<`r\_K>[*0w\3_Ψa<~z$Q*f (rզ+9Ɵ%GܷOd+B+! !.^$,o&>uNN0^v~a^ >Tl,)T&9gآf=Nn8ÎW~8 %XYTG>=QWTW{Y,^yg4'hTF#/m݀8]fJŚL}iilY2(1[ :^b.̲X+M+EaݹtWE{V_"Wyx0V"TSR+_#N) {EyMJ+ע+V"B+[+WEl5b+\5qu+ExWk\4WEiV"Q[DVWřK VaVH+ވEyV u+V抭!%!$~i sQrZ%?N~uZ/NHVyc}©Q48?2w7ӡQN*{2ط7BfY}e2.0!>j&/nz o@]\ILSQw~ Sʿ͌ 4C 1DO g=|z y0fU#T b,գT >ހqzRW݁6 >m;_ɿ~!djٖzqcKy>-zmq KO6Ք$I̬-ʟ%5c/O#E-#۶:+9Y^>noƎx_-iDt\7oAք҉HS˗)CO$QP 9r(EZ֪),д)(AB#U?k%(Wq$ڌOb.:-n6h~T)jQwrWfꣷ2q+Qu39L^xWϲEnT#XEvY:ox2h"|O_`odExWcEloUEf7853`{~ ˄۽:*`[C3mŠ[ '"#XI#}t\+rƤͧ{FOwUx;j3-#]~>!0M:Z&qRD2 - `NI `Oa^lHlLum'8GS7tko Efvn钀M 9V{$8•"SXBљT[PEvBnu~K8>GL'4|tEQ<쭾yȌDit.)*nVj/ϏEzOc#mJ&&E~tWkEfEbҢL +ExWEvq(5Zf?%eQ_jnJ t'ONgu'p}$^"U#T)$;)T;H"鼛nWE'l3> FOcu 1┃"SeBt#Yyia0='dFZiqB vk bjWxó0B医s/c&{Y{BHjT( 8aA׵XH wvY{G| +ՎzC~tkQve6.AX4?CIo|\$Kv:2}&?܎Ś[N2 } ll]a&DvrES:q9#8s{(%0e;B9"Xj&l;xōDJ4mSSVg^_ϯ蚓OFi:]WP{9OK}Ғ7hWj~{<(J닥U) P WD7'xL^6l22 ?)p8=ۅ-=x]?Qxq8KO<,bAE2tF?%d~h.BS!`5Hև a@*G_Rs~śrqS~b`e]ͤM?wWX.3(c~3IBs7WX-w<98zrAFR]jgҐOo檮f fSKWDo0"65m*cªԿkR-jkj ?[ﭿ`x^^mMN%&F ޥ mrIfyCӶ x=pU㨍i9Pz9:P8Ns6HnJ̤ ܗ$gLOu7I290du+DwZ=˧vn"lB]̞vr-tOݘGp<%۸~HB_ 2’ߟ>l24 qdU !6}~c X N+cֳS=Pc>P8A3"0bwy.Kͽ!6CJ~1T!Vn=okgl&'J]/ Si V@QSH7Y)c*P<XچI4P*ICJ%Tb94 3UnTgK>_ uD_@ M`4s0rT2ш$5s`]N׻n 5Йn h iADQ(6@RrA1  S9M wxSHG@R%(-BiL329HrQPxr78t`N@-D kzɒFT %\.v6V쥜6\5`& PD Y6Q)Aα>M}yźҒF)?mx#X+}5O?teu29쮗W֔(XiV2m=ΝbolB?W(}[=~5n|봶i<;ޑ}Rv4)D܏Q]=Hb%)@VJ3JAJҀ)F%J% !$B`QQ J"R P<eP|oGN "i e (x'Myq (c xT B5yPc!E d*` jBR _{XQ MD@(:Ut"$B7 3'!n:(>nPo"|*!7<ޤ\':o0 <<+@󏐂D0@Bn@"H Q4p (%(!P(b k 8Q" *< agI@/ 6}ZS=:d9nH*@6Ј\`h,Q *@Z·fB " urMR1 J3ܠ!0V(W7$O|+~Rh  ti |ք:Hc+`pG @>JaKE,a?*QJ!CvxB (Tc8B@!%gBZQ B)0c:g(E+ab<(la>zZ J!xEe(M*F HB B16E&%c*Q DCM$`>oEp{ǥ__(((Fh >X : 9q(@(6C~1?M9D1E"S㽝Os [և\ Ρ-@By[3;"P1 Nt!(r9$x6C+>Ms_2ɝQ2j|?wdwuLkfRxT'uYW0s/*%z;-B7dvmE}3p|K|;^k JzJ|}%RO؃p2otӨEẅ́M qheтT3CJ_MΙN hD#\}p,%옷^~~[w+, DU'gE~MLР wTU͏Xn&\ȓ%SwGg:;Bs c<aD1>\&6GD0E B&{6j\l>JP⚶c-Cm[ǥ&wQpaJL\jbӖ.g1byQLJ=oCι {[F'rOGS ݯfP=1ޟq+?v$h(3TYѳX/Ir'.I?_k &f{\^A'w(۪Lvm8fy0\:ĊYkʆn "GUeQ~4BG{GSu"HtmM$fmޣ\5n'ت#})kW 13ޞKJc毺M*`K׼o,8~G-d,}#Tq4fgókl0^ |qNC%%nތÎk @Ҵ:|VһJFcsʘ)81F %$g#$ jP,TByz✛p{k{U|H>+;_). cSY>?ҿWVtF> DGygn H7.y[',\v<.9 ->x?ibdc!TgT SɉoHUFh*WD0RߊBIȻmJ)v7usCÏgbEڷzg#.~߄my:΋5&/źkAQZcT xW2NMo!B`V=呓-27h`mSBIB'm/4{vN h}-fEm[w6G`>O;GP4:zfʎGۺ;ES|GY/8,M± =dq7xX5jNsZGXP R?;=a}X߼)f"&SvP q[` 1H6:rn{|d!vC\eSGvZ\[8pվ"$ JsC_Mj3i~T&% pW2U!g!0#[>g3GfqGg[ ?Ά-vr46 l^K?XsL wk%D%/w*v2m'yo p)i_^G}]wf<H+፞'R&y`(kRF``#LR#t ?̈K9RZc Mt*ɺl6G@'q}R]ꙉbqzsz_ ~! 2l!޿k(Jhb""'1Ɵ^z<þ,׀ S è-] ϺQKTD&,5ºUmX 5$x~ˌrz37s݊zC/rPYgxzM[^Sdm @6?}@6J(JA֬rkYh7]-/x~TxN:Wjii3i?aۯn1^oL/KXd>ZPuQ,Ab-zGwge^a"dk֫W_bEw1@A޶Ы >U O'#Pj@1F8CTpK ptHЏ/b;cR2:H1"]˖}:$:(o_6E1g'w5r>Bk@BcFױ0_?bM’NyAUeq*+V )cgG,Gu=S`yl7pgL|7Ronf}ɿ@+$@7c xߪ'g77YkPVq sG$A;͖9x4yV1*3LVL#6"!V&"@ǔR:B0x+>S75UԎ(BmwcH`Q QpPy,ㄌ`j8n,ķuM-.6e-^_F=D(`cTeXp9XPW ^T~aS,"X@dꇜS0$k&,pH/I.-S3*t dsl:ة\,dV\MB,27]Lld1IW0NK:GnћEi'q/3Rcg)+] D (ߤZ<IOr~a~+ΦCYr@hwTUl.D> ̰ EDohl1$yܙ]UP6fsΈFdϳ/w 1c+akg8} |#;'`$/,QΧ7F=#h̋=Jp $Rq Ι@r8i辳$`uC'!,,kI"UGww3p: jx>[)Ý{{%wqH)9C 푵m0dLr7ZN'd.KfGGF4m[.;J qNV (FQ%E|AcП?-['a? 'Zd/׏sw f7Qǎľ [:p;Q>wv|rd7zC "[wjܸJS Dw 4oSObH ?[ u>g ZuݷKoiouB's] ,~ 3BFpb Gq5edsc9v Qr;o{;ƫ##ač6B<:<~*΄ڃ \l|y̡,)(GFS3%_޼Ԇ[2P!C"=0ҥƭO4/s2@ضm\暝+kUz܌K-Mhjl# '-Xּ8*r e'3}4K!ؗ_P5/&}ZO^ܳڎG#'෬ޱ*į5z TUQG]Yvq&5M"]Y%_ib+P#"V+TSt * IA`7r%Y5g]~lZC 3pZkOMd3i/oJEt*YwVwv _R[{;=4}7YHTnq*JO_c iڛAm.r UJsތn?+ Udb3""\-8v}ҖPI;awOIMx)HLnE<ԋF OE^i>w{3 ؄#Ȕ $QE#o+Ӿ̍COgZfIW]x^ZejQYͣmw=t;rw(JV+rڏ-S~d&߳aWsUPF! ک8 `;xPVSSO%]iP/hyNC߻^#`r/RV7KF74;MZ`Gf˿\Gn18WgrHcF-GN-ν.K-^Vd]KXFW`z^ d7D-;c.DB];/)C̛ټD` }h+z/"j޳꛻t^:ze .rbʘ~_@Q1s'Ybd97,֧ӹ;"cD%!ijtj0BAe=&c x_)&eszw| V'n&MOySb0Vp6EC [" Gn«oKRAδD|ΕJ=,V"sMa;)seYRݼ3TTr^U`ww$KA8T?Ɯٶ|,$#xp5T 6FC;G$DHJjӎ(B\ӢIL#*HHQ9ٽ} eR|G(튗TaF $TTxݔܺP)7ڤv34az md1'!hQ6E)"Ls=6W DG~ l"2ځܳthVIȍ'*sv)1[ي*L|3 Q .o7UCZ:XJE30|9y0W* T䑦p%Dl*ozmL "ȭn˧ Nqءv=OEU|ìe͘ZN9G׬N6NJN.q+PuI(%7PQtSHT2#H LK/]X} ^ox ]Yyլ L 2oiشeTqh%CyN Z'?C$W*M!59yqȬjL(S?a@]PŖ2l'slcAw}Dm>*s\;uwqDjpgJ􎲇̷bCC^ DH/Zi6rawF$U#P&>8u0?[o$[)`z))Hކ`CB9'4 Xo;CIoPw=Qm͛!JlO3d@)_r |P-uH"D#ZX|(SOSo N._T\0u5x0*ʢ 2[z)Lͦ#d^u\BqpF" n ~}RNHpj k1fBNJ?Y@vA/,5-E+4g"#ɑYTRD6v^ +[4I'h)!FN,| a/.wΨ K'iy@GiO^ӧk8%,Z A;nـq5@T F:^Nk[ ǩةN0nuԞfQZY۵y/ղ<7w$ݠ?UD vfR |#F|η_L(h.N˝ڕ7Iq ?JZchSu* UDKc1URGe H_?sOzQ֮"]Az^Əÿm4e$19qe⏐IK%axy ÊXw_[<9ݹ{K-[2zܓck8u(zBc# KL֟u"]٫NQa0*a5 ]e4H]aDyJ)qp96@i.'{u%>%ŔΔVxpXJ.%EGd^C9kV%74{ߚ, x-朅h1r [)II`%@D%v1>jfanxKUtF[\S)](v'D\{~U٘6qFcRbT~ aʀhoƵ4 R=7/2.&T;#g MUB%#L6*9{"I,A2oۥJC'*}?jD]0I&,7zefSkSoC~g0wp .~( (=FZ}eu~߭QMbwS]iA#9`rt8bJTH0k7D "ly۞z+j#P8vШ6 s]R/j$:FK[ƾbjmO Aq`8 kXu"05Re^ \:+vD7VBpC~>p~ T}ʄu_nQ>phepnJv-bw.{S&x?[TfL9cdؗJK=-z?w3I zDz]cgbwdvsW)JqR12@OB$l6W]hwf&;5$pqv'HB瑉mp(Z>c7 w޴jDt6h(yF?R>4W;\$iXsR7^&"poja-9I@D+ó?W^dc~l\U8ǂXxE09ѓ v{"JHt Y*;\Jqy@!0!=V9㰏Gf!{ig/OsR& fzR^gq0]S0YB[ܱCГq.]30Ϭ z0@l-6Exp_[HwF[7ﳕvBorqϨ >H1}~Tf&]AX d|' K xC;c詳ٽloCU1p%MvQ U^ID?^_>j[8R scFLŬIoIr)TXl|hO|13Y˺ZdJ^U`(se;{#=szn&L0rk9)o途":/?Ƭ5_A1~Pϱt $I 1ACCBD D @#YĠexW~j:z'{s8k#vYaը{?NƬ91y.̐a0,v]D\Au7|5 DŽJHDO\|Qg*`AIM $I BH( /o;&5y iڤ4CY1dW(H(_WQxJm _v˙Ӭ+3}{oWT~(o HfLčYP=˝7Vذc镏ꏲVp-ckP劐|;"X:DCSy fLb$ N`?AەOgcM-: 5 :xh`GTodONЌR:pN#Sד{Ҕmb3Ui׮R1Yr=Fڣ;1ϻ=m?` "׋QV% fr:JJElz'hV*0bXQ!XPH()hV44H\M25Kb8ZlBp#siu0*ctx>iSjpCƦ6t/%ԣ@+4jTy;Ta\>'2|F'-&:'yxStksƞh!T !25>o1o~ǯkNy}z6d$fXJU[ X,Z{iu!?ݔ lx61ڵxED_vʑi䡭jc _oò#[ךYGeKrKUQ@h!JHirs$u\ӥ*K!ޞ͊ |now0=|k:[`r An/S+ F {Z36=vu*},Bg_ 2D A"'Nd+RM򉵺$m>[<=1+^j >kDڜF5C617KdbR/u'o٬nl6g ؒJw{!}f0E:ڕ="H!|mzunD& (BBJTURBpWHNێ,< .Ţ[ZZ M!e[ ɶ<`[y%(S[_֍UL4@lgwǎ Nʰ+>fĆ@ %w%O2ر!n^x~OdmIJZ֎]^4G+BbrYꍯ!Ү$ܞ$>t ~\OVǬG֣h)3֝uę'IYr@/ۼT1}I֯:;4nqFepAQQ[wov%to8ς3I(&ؐ;h g/@%FP4 "bb1{FLp-TkOȗ-!Fq '[CGuKZQ;\򧥩hAۄy( ^K=ЗPqiJjcPj<1 B0Ww@SeV2%q[;+?N̑n82i(Z@Aחϥ:? ~2 :^Y |r*8=^ZӠDKSUevY;S#6ϚD6^"zI Ұ D ";en^6(G(n[@A<wAk<#[ewx3J߿V9Ie2+?ZoN@]}!Ce,rKMEykd\TJ$Üٺ{oZnP?+bD7Jwn٨jò >&m7n:I?y #?d$kxw(nuY73_;cĢA~' Ʈ(Pk.CC^%R[v8(a9 `U! !&:tc~g>oTflFGԨyL{knY4O5i:VT)o*(f!fqY5-X&B(~r-MOu+ް^VTVor {94w>F  H bR B A "! B!7٦;I b!H!" k!-Ч$.Z"3 `Z@y7^+~;;7I:n$8 _V |)~{4; jv@':, j}qW,}˿_%زʀA; vFzթ7S?TYU5Y~:Z"$&ș. $.dC@,,ߙOӧݱt/CDjPj͎)~}GZvS>~NeR@WF.[|D  DgӒ68,ۍ*@G!7 7`@$4m @/iQ-/o!9=5/Ae*ޞIIN?6Uّ͌J]~eC?S10 #|֦RbןeP.RpوuY7I\8WStYE_y C~mOcS>i*t 9b'2̀tql*uNS<² P<ٹ Thl)1;X4-9RϋbMj'wmrဟO=T'>@'`@-,Y7@wc+5migo0OrE"6ԣC55Nyնֻ˧=8Z!iHg\X +Jl?@'d)M@gsM$t)sūCp5LZ rNvz3&X% s$NIAOV9GgX@vmlJj]ff"wW׼3ԡO-Ey469 JM2C#e[r{ߨڒҥ_'=_d@1%]zmP>\ݴ2)$$Yf,K+~ ӑL>۸,/g#NqHe@GJq:}Ӽ"8aQ-z<d&1䚳`sq yv 8e<&л},HH%  0mZ$KmKxY[ە!mU1f7t q jOOvVv?!>aq`Žt8xI&-\?Z2hՄ],ِ%Fz*oD)iki9#G3:\F35Ix]A0rۮE, !+t  F_Ap^&01T15o#]†8Ũ8N(#\ 6V\%Dp@~[D<|pKhT I?`@w9t Q9LzdsRkUzB"-,|^K(f-~Sޣ\Vj<{^(Q婯rF(>w;, nqx 2 eK;eأ"& "-;3:-+ @taRdGUag(b;4~qt & $ jyY0Q(:>&ɧs%6mֱQOښ$ "3kpM㗯;3YM2y l1Ǿ _,$ ?Ogp#9Nx=KF04R+7W 3 R!ŧbX99'ޕ=!+^@f~XWmI߾_@ոdh /MlktO0yMS)X/c>q'a8+ob' ~1ǜ3M ,NۉQפA !r!"6?_Qqz:{@Af=6coKf SNXZ^=&?kuL2s[om{͗x@41$ RA bq}/d[|m>/7ۍozW'Yv #;erA43Vh:S׆fys&^Yб4q7b6 i2+<3K͉磬qKGJZ8K,\=mw->?6,Ĵ>ojR\Xzew7{1_B,^}<:Wجy$MAV8nyY0|D2=49~id ϱoH7dHn7BDXv:`=Nϊ DM@$IwxaQ(|lyx-B7&OWO~[nxiiuͤn#%c~Z DuQ5 ,Uvsrn#Լ+r=8'8nK_ubP1HBz NoZo>U-A^^, Dwh>(ș&=0#/+EB-1 ڀ`{DK90)]rI! T(:$0FݿC7*&G,QXD@@ »KAefS>ZHC%tktfAʛ"!?(G(oT+$w~9Ɂ,}/jjr>>e{vX}zez n2y9QrXr(2UJVTGoyy̔!Fءn/DXsikUpZ?y Gm eptFl3)[rR!'ZJRc* I ($(WߥPZ~64/R`?!w sƘ{7¼sw}%sݨ%4G܀+Ñx.n_u'6dhiT!X>V%{#^#bb1 ;GȬ0-Ġ͊#Ee$/SN'L¹ao)ջ:X+HVw-u*E$/@{gc:G<="="G>o_wY#Mݮu}M=l|q,X |sO ͇P[~%~j|rjPǗ1ha9%I?q8̛Ul?'w]Sp6} c Nr>D) ]gohR9Da:,s{d c|j6enca7Xk/ֽyU=T<o:o9j'ixtP0V(PQX(0H*< V_ssv+ls-]Rʁ21ߒ@f >Yʚĸ|/T}BS]8l@9815-0}'gfzB{roX H`=#2<|?gM鬅:z]T:gR7k{ o IDzmC=Ф Ϻ&$1a *̭~֖D}\([Ap0hQkU2b)R0WC>U#jTzlM'`wq40~ܝl[{3ճԨ|U"VhhD @  O XhVx.JB:bҳB 5g:e0~;oJwݴċ>IfDYrcY Ӂ]^2䫝~h p7  ]/;JP$Aud>%q#Z59oeCK ''Lw8{#0Υ ִG:HH>v\=$$i&.R(D+"Y  iZ}lNb<ϫC9us3tZ/d 2!ct4.ж`\ɀ.e=s39"~(bD wGÙ>^gBq?bs6W|Σ=tAcE5u%2 N2Bcdy8F|j7olɺ=[l`ޠV/l?(ܔnǗIih@H"[Dž;4ض+S/1~_V͵"aJ2(4hcj+&9վEEBB9%-gc kiPݳBkc%oHW}k4IyXP"&j#;ԩWnT;TuI=8)s$`WVjC|SB:IT18FE0>lPQ`Qat9^&/2lqđTqrۤsD35voQ[ o&͖gUndHǣ]jۏ+h_LCy-vey,pwǐ0m–&A:H//5n<%Lmj'ɤzp^Q<5fTl=Z#m_^eT]րz$WF+//ƕtjcxlT$RJ}+RjZ%O|~Zg;cV[RTW jB17W.q^|G@; |/bdxYo]!zxIML%h O1Xోvxm)o>\,}7S5ts6 iT?$KyR'߸ȉbm9@ϩbrziEGb֕r>4 !B؎'I٥LIQlnĴ33MdWK1cHB16c~ pv7- HC|OTMwOd0ȘMi)ܢ>ewݑmjj,"A<3塮Z 2j Д;v) g|cfx{ͧ+)be){[/}}:?.Lc\y ݇N]uA hA)!tc1 ;GڣaQwxX.Z՛ɥ0R:qRA$ru" >Ί쎧HGUPM0҉P 憸H,K&1VZ/H6C)+Zo []DiˬǕh/@晶Ĕȱ3 1&d:caDdu9 +߫eɽ|:#%˴3v)b QFB_6Ӣ D7Y'ھgv-C9?| Kkj2P#tp mj*77]8yVrE)\8翾[5?3ٕNk4\]@B*tه6*91Jg׶Hpz9Z LOmGvYeH DIH&Łiןz"o47*ؔY\̿-O8&ޱۜ`aU]^T/[)^Pgz) h<2RW)>O[ Ǥl;mMoƋg%w81Q %Ju[8]cj,Y]mjW8Tͮޒؐg~]C2:QrJ_!@OqG<+a$Lbߜ2C 敖|s%EbpY1 2N2ҺR+ľL\PQwksf:~'>HyHp'鼣XD>/~0)ʫv&ϭ+7aX1_k;ƦwԤ"5 mhX_Ѝ*CzMV<%ױ'U>]x$OuĞYGTAUan'81^1cx~0T˫팜1^4XNW)j!ŷg(]%rwŷSRYOGG }K*==#VvzsY-g GZ՛9ԮJlolmBd\_zf*M$Rgc .Rtݑ~_3NUfO# ݆Rø5 lah(3w+hʙBľlxj]VEGO׉[@#Kn;U`J!TUAC%?Hf?oA+^l i1jCh`ue2 xE1@.g9Md6Lv ѥIEָTeIA"϶RQ30ЁEgZ6WjHnG%c !vٹO6"!(U)/2v\4yR-{;_vKu.lw`գX ֖eQ/Y9lG(^%U?]7SQS_C&9~ƧUvK9`%KΪ%K]GpSlHjM]̭~%ELTkgmve.q_MruW~l>cاEM7˻:/|}lYD޻G;sP"&VSIe3"sBH% C8wTRgy-{4SF)}_=*y4c]n9W" 1H*J`- aӭͣ&hڅ}Ҍ?3#B6{C L, &U~F5b.$z1X(JRpoRn%ү3!P3ah nE۞I S=PA(afcC"qEhת4t,nk%h󝦑A)aϻ(`? A!5WN 4Be_Or 3(z퓰 gAAl,V{!@@&%5.4΅vo`s2S9T|lta ^؛ft)ma-R_fq4@B@%b#d -?̥ Af9k.q =(WSks>bb@{Ks|@vsH }Q"L;מL;=Y@zMZlۛyG&mY%ԥ#dan@.rȳq. à>Ѧ tXv8d[A8S-=P:`_dMm#λ—JZDJWWfo5#O4/4iMf~}#4}zUEM{c:чX}nkP"%=zJQ- !ѤAE;q0q/37<845Lcrs 1|wҽEǭ킙̀K1'XFl:muO7?.W@Ct]i ן2x I#FGDc}auγ1U)l{f0B>QX._Zoj# *]j0%;.~_CpwRҳeGafEv7D:_ØmM-: UsӁ gNݳiR1<rh9u R-E9`P&q.BHmQSuο#B DJ@@rTY/>G [#QanUS[ޫWoWl:wO<Ş>[ `2Ύȃ; 4oRԗggKH@  ?bʫte^ULKƢfs;i"1ˆh85_SM6 }.lo\chA/(yZ[DjԲ-PʘrZφ (/>/5&,_!?+pI?R}Kpz:^!0SlA]JhS:GEi n ob ם\TMߕ<͑7р[ ~5Mt"ۢukLl;rJHk oajDD sawmh"~@N|,fs[iϦ^6:=&I8ƲsI\*}rq0l%u6GV,?J l`v[nO`2yF䡇k[p,=DMT;>|4pЮLe B+7|Wy+Ʀ8/b^! 7jg3tzs-ǰ*Sb τՆYvtHYzg1wZhka0HAnnl{Y4Koi !Xbg'KڡzdZ}BH_S(! ~H  Ч* kX2~N7Zy%JR:l /yX[n9ܭj]C(n/.NQ4 0k2voOfRfYksJJRLܥBwm9싇!8B|dߡll.xG|鯖+RQ,6* if@!udш?FyG)񧆤@BT!-OVS;:OÜ^[qR,U 1SA0 ɧ7\nyE~!7r/$/~OOM 0-VGL^oW;<\[k'J:u\< pJqUACj7 CX"Z{';e;Z(b``>U'S\連 `ۨF(8)~kU‚?L.f}~bq24YgGaU v TBQ[ac( ؋S%?ps $˞Mr^espYy$!(@5dx۾Qy5h׫` p2eZ>d!5$̬!|"|BiԡWzI|8p5F P TٍEiIOmq>l a$/Vіr%oi6\o<^_WX=Y+2Qg%iq`񫈬խSDɝ-TP&ǭk dplֲs1}WuǁVOr}Uw,Iklͽ9d"iԦRuA_B1ReS8o*Af|M9]s/nV7e1%8m] 3]()HJTTh֚?u^f|wz晟tTOcOiOGM.=P@9 ¶XO G\s;bi]:NT"Ąjѿ ЃaXYεԔsсAXx1Dn^q⡎sMbg"C@5`@a>T&0Q],TxHGR swvng@X&G: ~V=+`h$>1ٽ87  6(f*A5" SI`jPIF(|JAtZ*V1j4CEySwo{Wa@@ yVd_| -wl<=R`;A,d6{ir<':fBdcH/INn PI)N`j 0㱧[0+信"n>*SSKI3SMHx1!^j;t!̌=HqjE3á{ Ш?SW~by5t 1V@X9Tzm͹W:yy.fF*$M)HEZSH׊cy|Ը/Cm\,ֽ3k]Or7dB=L0 Y˖8Q}蛿/Ԧvs[67ÁGrXMï4d렿OBaic`;ݩtGڕBKe5.ZJB_'@tVB ݫUoCP=B_m)4iԕseBޭlUA!dPkiT2)>Q2B1D |fu%M&=slS%}Фҟ) p9CE8KrYr mRΒf D R@%B-b!4v_*1D<+a*eg30NDЦ&vj[:0¯` QS*B$o؜*k<_Ǣڅcri;NOIy`"S:J }~+?x{~:hRӕ_0QCfUhuUu+Z?#I_!ǧoгwzޏ;HIxubM߂oL&11;cKj5̎sÅߦ8Şx̀1w G, -N Li YA},bor5IN'P]]?5b!D< KǑ _w$%QX^B[?D0n>!]IqPz_BQW뢦B! d/M R I A]9B/޷|^l.zq7rgL=LWzl%qf@DnrO@n/?jO;Yì7(tLܛ4T͏5nb0qdrƨ+?w{Ƃ(WJPO+t]0C8 $i4P!**|sWz]mteh i$$&?tۘ- |yMW"@3%d_{<ZM~C߱+DD,6F si"\Gܨ"M޽eݵgJ&q`x:vHe>!+Um&Bu*߾B^;Y.ZFkNT[U\IZ8*So6Rˆ}v)1ш( gM@x;Yت4=LjBx?Ϧ3. ͘G~ ~)7.GvjNc7}{@m1|D@Orj6QDn1|d/TQJYҴ/az+%Ůޣ|;JDZoޕj#%!^@C Ұ\&) 1'6^4縦r`se-?g\87SЇZq)iR齓tEm"O~Cld}թLKP@ OODLy$ӿ^FcNFA[qk"G85ʨ-*IE#JϜU|^ϡ[F}nQ6?Z>w5ǐP-CX܃#$م,~ 1s`ŽSv@y]uLU'B]Ux.wb0d_W /*]"@և iSZmY{BXqsC2Z5R[OP]. 0cy36w>g6"@=x**A U4g񥄂}mi'X b=bukGlT{rj{с%tGn0_q/ϡj)4B0ꢪTUIE!PT *N~oժF:1UVѱ)kRLYd)AO QG<>,Xx 07a?^s^F 4AأѺi Mh9M!D`6-H*5eD2݈ n=]޳s !""!"" H IBIA PCD BA0@D1 C DBB =OwkWK/6MrUsa&1zzs6ʅOչ| D v=]OELڅdv= lix6+X]ޤ_O <ՑZH(IH$$(RPB˚g[˯XIFtKn iNQ[pfwд=S!0?zl0rmTܢ@*z{IG@UJ&x:&պx:@NgEGi ! Ks~鈂XP"9ѻPl'k_{ďd7[e$%HͰߒӂY8t,ώS]48ØO9[3TAtK:\eUy"`8y{] |N!+*oǤ+)MITмX*ڥqZAjMɂ\t wGR]Ӗy%P@,>4t H蚖#sWb!7VfV׍ra^ Sגw<8d ux\V+nJ`BΈVQݦfR F~"@ N_("XL&rBB$5/jx{&ptL!Xf*8%Z`oWY,F0p^-FWnPLpr Q~*~Z@D3%UUH$Ig[Bሟ0xx㺝ݥ4Pq`뮎#-),'&< c<&+˥Tlx288P cHe H%| fQ&6wK^46!~!Ϯye{<>|eTk) ~ ZݎEi'MZ.gloŋt );5Rwd/$?6xs"nXꝋoNŪ=HJQs:Y+D_eO#<pW)(2SuV`$`QAwxˑGۑ;gҿ`k2̻F`ku6v7WG3٫355G eL/͈ @ǟ/o8=W'3chenqeG +1v(@G.ԨB2W{7V^}nu|ٰ=Ӯt߽R*=!a׷JYDfvqi~vOA|Ԟц9+RmZI?[(3#%rvj潫,*ŭ_wr}w`Kdv$zR3z)]2̼d?iPWNFQ+EܛtGn*{$)iX`3煞h9JB A4Y(.:?z t@δ؈_<: Qcsɍ$|&AsbdX[;yD"a(!(U(59_l<85K/AV =yݣ E@6&G[iiN!? Ɵ@ Y6t<4+h|ETY\ Y]A Z1Ŷ,,JA3mdU+58v\vݜHˆjD! ʐƟz0Vv]Fg7%XʇFJ2(jc F|f}$Ka LB*v kWjf4m5[׸KOW=^|l(V`(p:4j%fJR >4{k;Xd\F>lxhW;V,v/pv6.DA /Ox{;1Sٵ'8=T~ꛊuuIbG2_\p'W?COQ,lӏ߷n?6gq1|eICLș"[uoֳRp2t@|YQ%s 7(tRbBNjAP{DU}WO T0fWx`~]jV 6$T+'[.B/@?0N/dx:/{]vK0ȇ)|gڦG ޕrT[ث"X Hxiv{uiv=L|gS eϾ-Csc&;WF|mO`}~뮠Ni/SM'XnV2H'j|dߒ2]ׯ9\q@b i@SP0 84<Ϥ]T 'wfiPkRW8lF[*߰jkA61U4@ fo{)ٸ -ގ6?}ЉGQe@kkBWOQ\T> F|~"u.p4Ouj}"^SC(C KsG*qWR4}Ĺ}_ڬǵuWOewfV|ߪH 7VDec 2aV/ K܈c84ep2ͷ/͚םk˳+w2r5{ƟG-|7h{[\{/>(R`Z{旸fL:F]{?m{9{f20ZLiAcpR$0HEÔT)H\[ALcT{eMyi8WnCW$E[&ˢ-d+'jR d0 U/AX7 6; m1_ݵhk]qpp%^>2@ U)) PF{;@A'JhċM˲ tp^v; [}6;K6\.Y~3^˴ )}"$?F>ÓSjmUW!ݏ.(fcBjyIB%>_эo`_=AeŸ/d97)"^!|F `ßc^Gc}/SI{ ;w?s)ewNw򞭬\T֌j#ev$f8Uh-⩷N[g45L8жgd$:R1q I*Oyr`l;£b|~|z)朋~i^xk|KѡӁ@h +.@0"HR |Q?hE[w؝찖 eJ*T{p֣f/@ ^. Spr&dΏwXj;x5ש5x@>~M;XECHٙ`.~O3".t1>?tdY 'lFaSK'7i{P%Ö Nԛbt,KL_3z74uNA6d>`E~_0P-$$#^*!Ŗ&t}_,w$tan( zT7G)&j~scB ou]s,|?㨖8suu'Q9cmIhlNOseRP4 n h96hjIKQ=b Fp?j7ϼ4J a`Psԯ&#E3J 3逖!k%[121?Cٲ1%{5܊(nȾLݤZ߹ETDD|xd݌+XqǮn.3]6;\7F'E>TsNSLI!x2w(tKٽ=L[ P#N=jT ZSdk  V;&8LB3ZeĒMU4SY 5a>kkx^c AԂ}&MMugi j_>o*U>ʸV8aK@x@oƚ4U|s`uуls|lycc|1O%jsL^0P/9 57B}f~pM? +_~@R!-whj6h C[9ĐB[5a hiXhE?\Uٽ2 sV-G&ZL\0`$ vOgK_~7iFp@"8k jk7qI4G~U>)k4@C`g+ˇ |v5\|=ig2n\v^1󔃵`u>U; YV\J~J>R?]֍b/j{X(_Kiܮrfyg=.&L[bIJ@QV#ϣ(!.x|0$~ JPS׌}b@?gvtN&g|c[l>0XgN躺=lo`Z`wKPi j!C B8ODYJUh%Y?,[wBQymKGf/hkIJXa ) NpQRDw} sH]QF`edz`FHuo#JsO0&7{P*5nG^2J_Lִ [QUM/Q6kw؄A?-]601%˙;^Yk79Ok?\qiWemРt@p_y_/_<Õ{ij^O%RtٕƩF!ZFw9s>&SYvqne `b`! b\ӕFQq|Pշ)~ Csz6ji>'i!=AniVs1(&͚͡/#32gV]?ӳD_ꕂsg'-82I pb^bvd!t> b}+ KꉕQ8"/U7*UErL0 C籎$֣U58Ј^,^@J"M k6ʘR q+B-pvU%ԯ,T55F2f}PH f|}ݍ:yy cM{F:s}%7Ay `lEavbJciG:i헞뷒ޗ)P)`D'C];e@ؒZnִ1ƍyy5mB]LEg{L}\zXs<>NoL+[$[1H'$K `Ď}X_74d$@8*.\nL˲|Qv-~fP \ë*ُs ^81ڧr冐Fq$aLAWgU_88="=v|[F"Ȅ ]  i.E6>aIusdؠ\MYVYֻbkG>YěmE}J-=Xjk[KХ1-Ct~׈Ijn8s!6iI]IC9dϧG\Œo"B/B;SqP5#*=> lM!8})sHD>qaV*Àܹ5=K<gY6bB&X?iv fŞ x)μ҃e銄y_$XH05Iչ^Ρ}0zصxGD@:jt*ً:iANN. 23^OOCī/ÿ.+dYc9]N=kIHQKח[.:҅_eS#}U"8d3"2Ɔ@߻([M/#^( &5@V{2JHS5s%볩(L *kRtZVky|;R?x'0|["sZw|HS J.QƊzR˗ïxc[Y<@T*;>/i !w$Or$ƟCh}xXQͻ<^yjo?4/-f`+N7f鶀tls-zeyKDцn/=pW+k>]/sJlu W P{HC3H y' c e9^yeԿ@Ú^Aϩ̩UCzS@ަxZ(_m/ږ 12TYpA^)\S*(zhT䂿=|}Ϟjd26NGڗYĭl6j5윬sF(GHzH0&GIY}Tw:?TsC=8Ys-ᒺ4>^FH3baU:FMdB ߽]&f._5)ÿjcnZ?CKolw.[յ!! T3}XiiB*TpPu!ZDL]ǀ9jfX;$?p,K.AK0LTPih Wfe+Tml\wRȮ?̣<pRF6N\.>ٱphD0zu6u̒vF^xAR QZlS5)m;? ,ء㊍t*.(2vB |RPqOk ޮT!K5{\3YiaU,k+7[0I68*&2n,QJAV:cV_v_>EP_Y]Nt%a) +iVA,ƼEJtbn~ES}BXߧ,\w23Ja)p^LTŰ:ǁɽh#ؔ4H4HXPߘ>cYi/ % Sc섳+Sn2$V:߮vmR_;xTn &٫~HJ5  > r5~Bt,&^hz^45b:{r\ e-ݎ,+l[:ݓaV![@_Y2I6vz-Ov- P>A>H ILd {;??Zot R `bM.U[zU"O}8g n &[N6z J ΂9j F-y+oװ5[Ԭ ae,!dI IV*)`j.}952mnּq-}]R\|AK>}N.V 71 iRLnB^ URR82ȉ]fxZr~g2-9Y:P:q'+Q{ 9<oȩ/8g?kmU'Qٷ.Olk:j|Ԕp|kAp`9d| @E!»ʑlWٙ<_dݫZzX>'d}"yuE$NɌ=S^I~PT37{~_Ag&rjk'sZ>xyQmU|`d#]1Td¾93m'Sxe hh*j}]DS=q-(M؅YW,hm˒H xےW"^-6P2%z>6Vϋ~ݑc 71k^Cߺ|CtkX.T/}R|rNMT\{TxE.]lNv;=9{Cu䀝c]4j3ex3W ܲd@"PbbG6dcxK@sڸURdmR!! zҤya˩N~TUKj{e%[qgb.8z\A.."{Udw}6yGd]r1A -&zn2vJjat #9g6B2޾C ݃o*俴V͞~#vTwԠ @'@ Q,%ڇck+aj#v0*qvm Mv旜PJT|?czEeQv3~Ƭt?c^ &s#5yJz}xR }{d-}e>QEj-YH|wgX:Ym\# +<&튰)hT||J~hwyN7aԖ}oiIbLND?2fOh^2)99Ńot֓_ܦ}SsTpk0Sz)$P Fqz)\:gϚviAnw>( nSF$ՅBg@ |E~=XIe&&U%߀lcei29D,E]@GYm]a[3̘gxc! ;MU]ٗ1\S4ͷ%qSVŵ?V?;UZּCP/&joL@! y]v$ғcɰޣs^+woF(r\*71މdb om"u.o:~ʒhI~E؛ZF:$9= /zaY5Wxѐ"R.E%AWC:Vɠnڧi?sZ0?Ixt%թ1P(q E6`.PZ'K\of R J"I{8B9C"WBP[7ˣMd)uqiRFӣvx>$ xo!6E1 ï@̦ v4W zhM4=mpy|‘20Kl81~AUIG.i@ԊR/W2.jYy*63Ōwu19_"3xJ# r0UdÜg'.N往RIuBR% 29ʟU#S' {dyXYy=\i(cm8bw͎@+r Mi=KǨu /,tdnj1v:X-Ք_xs\CI'qx.My?_SLTs֚'^_og;ý_zV98?!bb$r\"guj5d`ys&yX|t BV}=X*0')O#Q2}M#)LTo)\|V|j ʋя?_jPlX25@Bآ*UlwwcեSˢ,'n~9]WcCXC&+OͿ6.l!t1qF:N:F \ݥIP[l YE5h0XA d!@^4d t?(!ah^_* #S; {e/fBb?0F0L/ [IW!8!.seLegt#Q~~NU/1cdo)+6$ +tr6KwV8,Lr5V#![!]XX[tEhpOpvsl/6$T*1`a4ޓq(=͕V\ tټUj%FLoZ=UlF\=4AX2Ո# ة)-Bcg:t{D)9>Xi47?^-9AMjJlj +z, .<'4AopoG RV4 ]D UEPq˹AKA>/>ߡ([rǹ*g֦NdھpKT}N뎅fFw){֤#!HiFIb*  orW{m1&suJ U+-?0+)pq IŸGf"9ĥ5_+:}7ƭ5NVFp_܍$SP6E>Osz|t[72Ȯs~ΥL HJeIsj c 9)}^6wU+X}gT^D@A-s,KjU'u7TLFtx^Gx߃MHD!n>WSYsq#[R8#u'S. *k~, ٫[nch>4)֊SqF6Y0Ā6]OLbn{>H++p ED%]lR,2G-L^.[1{+%Ihu@C ]1 icT''Oj_N"zRoI\e_ULҹvj|4AToo%oђJZ8~\}qqQ9 r#k& zP?rEe5U-b_k0sCڰLt=Q-} bZIX;DRd7j-rϜ{@}B Ko޻Ap$"'qṀзԞwpP!eg@R'>vDDȝRO$A$Ǫίt'.b7ǫ$ˋ6_|ϸb1GcM 7~/6fn+{~_ >KS^[J*E`I4>"n4FPހ ]=Eg^"<Ι: ^<@>z7g&E2~cp/y9|e?]J4וG=^2Lv ̏=.9*1*j"?C}v.@Hql;}k7 yq櫹fKȡ<3ڻC_ÓOb~~KʫxwU?qx/O~.JOEy5@>9Q$4yl*}w 鍧\pY3〔XW\a~w 2;ƪDY2OΖJ oXNB@o1'uJgxG.[Vm 3o.$ ^HEx4xP_li>#QwςVU84]L!zٙ[>i~owOuVq^: ^͑jiPĉ,}r[ƽ* cqħ6)CPta@!@ht㫏|h/HG.~A9hB<5@TU+ՔC-{'ɒ2Ǽuތ!\tǤx'W >KUZi̪6!ށ~:̗_+ۤtVnW^+_< PB_nEtwҩ۫4J\쏧nM5R߯ P?VF" 4_Nz!"~j^`/5 Ώ2H>͒&j'hr7'z"LBn5OU:HEZZN*[O2R`!`=vwW6'/ؐC+V` Cyk*NoO> dKScxdqx8| .:~ϴ G}%mÍ5:t2G8Ly!eNz_]hv;Ȼ>Ǜw-(6azB퇅A,%C<"ԛLɡwd_+2 Xa9E.\ŽVh&`B^?UaL3xS:?#+~!A!\"7lg&=>a,@a?~Ϧu"coLBLFx]u/f p}ەd Sؠi<]_ك f 'g3@ @c@j\%:'s/kߋo(Ih4j]P| nc-o@p/p!&[)L·k`~c wZ{U~| Y9߀DtJ+ rpqi_9i}u^։| g!tSIF{dMGPHJ)20[vv}"NstU.vU84W c`?۴۟E\f\O gJ6mBHkiY7-LBWk7"!j\,"'w?m 0di*pk*84Ù2g$6X2?=^e#pOӍ4q'W:KS_hitpO/}Tw mutKcv/W--bIR}i^㛑Ed}L HO)ۺRK3ycϥp4c@{1\/ [bmkir~TĒ'T/ڣ`_NYFF?wtm(])g.NeU\!=1Ȝ]2|c6Tw9kB3cG>k}Xq>ޓD~?OO/&:. 8g̫!1 K]iwfםT6Iۦ((I3oiĀUis |%~@2Qy{LY)6,?zSNh'6v *ٙ$rGtKHz@x|7-a?kޏ`.bx)vƟqwG}"У tW"nSk(=jv\:nv_"c,PL[T[-^;lҲv>B;!΂u.`64YpA}a@HX]VRy_+=|J98S,lc}ݙѣT Tn!ӇzzH|701ʬ7Cd0s`nMbu1{b/&q&٢12otΙY9hAgamEW~GGMRk)J)|yk5IJ\Ry*H qr.]f\>UzՒWYoԶEyklMdjpna,,sVUv~E@ePkt4KxՙYWB·[R7l5,7Mx spR`\I$}[otsws(D ³2ENS5WJo:- [l<3q~$n^؀DQ(L٦ NPU 9 *)i!Jqy+gHj4,"o=h-9)۴+dLyڕX8hMT~SN~R"G-D[S!^dub\]^ARrYSbUd^sg ˸#nbs4|X;MnswFGß(M͆|vlmLΊ}R3Ntρ?:wX_:菜Y삺{bo{m}1ukR6s AR6Nd-?uT8UPVI\֠5@h%O׃<=B"Q()TX}w,,QV*/ぞ={Ymj/^(n^7Ҫn IABRRGw۞J7!DJUPPR%!"DIT*)^Pꪩ,*H;)Nt|BU/V[FAU>JR[dUQJ-M|MjnbmnTIUTJ-lmk6Jΐ({QUR]w^!JU@D*V5)(łFwH Uy06 yE"RV֤=B{|"m}} B AQUTu) mDPJ[0)#nEWuB*E.zh(JRUT$R@HQ-5;Ch$ss{TwJRT{*ևQTEU(PTP%TJ"ET>*PK}%URT@t Z>[ cݝzNv7n)|V"h 2dѠFh h4h4hFba2iLM114тba 2M" `0C #&b22a @ @ @4H&4ɩi$JlS)@ SFSoIOGg4̆Lh4Ɋzm ̋)VJLZ.Ǵ (䯭#*9}CDr1urʅ4UFX5ժd4U~)]`S[1[$,MGi9XL . 3rYn a*ed붪weQ/E TVg'i[M=.ԛ+e|Y-FJ/+Dh)cLJ؊ %Fv6KmH@q}miA\`y[j9plIjlExH|9j) %(la:@$>NWd%(KBTAj [jGjcpZ\Fx$=4{݄p-0[KG>:E8Y=:s b}[GGk;eϩFɺou|KruZɳߵ?[mߺOɻ¼8\%цmipG8F4oͭ&[ܮ9H@IyW-.+]MzmԈWo >mw4ua psqtSWgJml1w#|IwtE'm0QLih[}/2npwnK;}N#y5f&VZeAz)5)5xrݩa :ͮ$ i#܌$VM#3som S{Nѧ7GF+d/˗V6 6n7rp8ͼ_D ~:`k{z 75:xx{տyyrf y9ݨn~Un Ύ^G wJh۶psMWFfwiѷ}~crw{"okj[;GcE]QCQbYsayUqq!"ב'SPб#̭g>e% 21Hzjz1ɊZ$("&Vl8윢+$cW[# QXCTlt+ʻQF+Kɧ#]&a)h,h%T Lf"S jedڢ=G;?D*}q|ʻ"+夒$S*E2M B)=Q|GT47R1S7áeٺ|)7PmژWg2걹uW5XvSsS_%3h O&ƸE!Ide_r hQjf 柖)[3Um'c!j#JJ61F qOxQ (U)uM(hyDv!OQXI>#.ԶGiE̤/O22Ʊ&e'#QgRQc#M(dz)+FY8sOLe_4N9<s$吐N<#!PSf^Wf\;Rb8Axe{y~h&%%Ju !U| 6+pk2 foؚyټ(=<;f_4ꥮj6^2>csgFJ-XX=͵J)*r̢(4цAcWѸLjd-/As -wOؾxA^RyFo}VŷQ:e5CQnTET?/ж%gS=s{|cb}z9ZtU3s Y_CW-Cq10퐓ͣ'NѰyWwKO''wgox|dw vہ]5xkԺ4>2 Džؤc{UF^ o%N\/yR3Nͺ.~TkƒUĖS9_{ j7Zsv몧3R6,qV=~صr\8o S%Y6)Mbv|YX1Ϣpi%4O}&O(f}ZʻgM*_NiEvpyqJ,`cyO1cTym[xmKm]y>{Aޙ9F%y;lLZmܞTj" n>N%ərF&uJ,Zi~)ޤ̇Mo{UR:'*~ FW=zGq;.L>5Ŗwyjw{9էhrQem \>P{~띮wܷ?ouǪ#Z5FQW8w%<$ޟ]{QiߨwvIp_ =?+j'!5P?Ow]ƻc-esVԬ5fw;/_C]F߂:+m3 Al`O,qr9P~.몯}в@wko3v:1kVʻ?dJbP|?/^GB6{6Z5CNV~I WeZrt|TnnnQ*9UNO~}M7.^WH:)#}_-+Yߍ[¸nxPpyWoͿz<3Uм q vzG#;3 L\[.+GOӬm`QքAbKSTZfuiG.eR멛(c?}#ktm54#>EëoyϮ-·U~ݍw=zݞIA@neFQ0L־le/\+({&T~;O2HqK8SU\g6;ⱸwwfWZÙ\2t5ˑFk3:-ETT*Ÿtemoڻ3 XXh~ mF*}O#,3}6-~L~"IN-&PXNͻGk&{1;ST?NWs(R}9T{ևƭ1B0iCYJ(hOꬖrgp5w{fsoh_ޭfE3ͲqQ%8;t՘Ua1z-2>Kkʉ'{M =0I|u_jn*m|\6Ua)VVo;4ټޡI*7κTMgj}o~V~<3γ98-OSI"ZӶ/WOzǴR.jSq'U v_k.b Sr8-# ݬ]mZ ߮s^;PZ eYWa(ٷ~xqOjKY3)cED0Qj`7MC9ΟVgCxSlp7Oe+̓fX.]vϋR.1hm.7g:%Oe>{cf({OuQ-|RdwG%>6-C~i[R.*t ^6 nRsbZJT(VvqTVrrgQsu{ h±s7HYx# i_[bpu+g#IG\i?Stt+ܶJ\c4>zqxz+)Rj_k|ݎ߮/:;GmL>7=+4|N73c3YβnR>k0+X1B{_or=tOw([NgֲG1ptub@z{Dλ.Vux44e]CSL?W*" :ʟOuWMo+hRҸPcU:޴!Jb|{^y \_C߄}ld, ۤg2Gfej܈M; =ա>.`iS&Ҵ72.dn}=xmɭ{kyv nz3sk++T)K o*_u~\=Tq|rNƋw>C7_xflUv9xܯA UM |/+:wOf2{~iYr[Lal$I&P!|IA ! uV.$ 5b-%1̄@&jFLD24#fTbƉ"dA !a$C#C$DR0BD БA,@f(Fd J@HbDИJfC L2HDFDL$e$m14&0JR(CH! T̊J`l̠BȄ(C4DH13 IE4Bb2d mfJ`ؐ4 LfF@$,M`%(2iR45$A0Xȗz1(=tR q;7wB}Ny]!ku:;P'£=^&B~Zmnxhβ\IYiΔ+{gxny:㜥eg;qwみ~|W8skƺyBU:<̑cWqh߾Fծ^c,m% n~5;idq<.k߄׎۷s68b\xsXoim<󴮮{|Mw>;go[+]ZW{M;%kjBVR떹:1m"ۮU7OkEfAO1"O(jZtUb0\?vgBO[OM2(f*Y"D}<,>{{T*l#(ߚŬsŁdJSM0M9Sms G-fN[i$ U,q!T40VLc,4sW *%E]F`VtP_8Hj* j*)E`)G!vɖ"dHD즜qZeMWx8 /rwq˥F{0UI e j|N s AV71ŎXmU2Ih%Fb#U C X!Wm$ԍ)ul+0F s^f=UMmEo=1G!z/1d(Ui:g7T6yYL2q-V4ӮȪ= ͕uxBy+]#ٖOdMז3 aBDEH#*\%I鐀ЌABΜ-)K*y2c}5R%՝prkbKpI\g0M@_^Kӄ΅kniNbw;"}eBHJHXg*xFj$*@mj0£3x_I$:9,Iei)i4uQH *$TkgmN|y&..O ͭյXx.ڴiIvZ8lhO}_Iߒ_f_.N*t]k&'sG[&Yh1)Ͼo(ju?<6 ,m>oxey".~Cž8ͼc_<5(رmrkٞ8)p&5 z|"̻U*ɩ uɈ鵝ꍂZ$r3c9zl=GZ5RHQOPX,[6Tv&!320.dн~Mym!e0Xד̏Vpp4 D?DG$_~ӖK}H>Yxm\:l%w| {jK:TknqK[6Ըߜ-Jq/J/l7{柵I$Cb%hyp^$ m $$$"IʵW-[hb\UU h  $,2T%KLu˜UR,̽.f`s/EwG{n8]T & %-X $!25@+*Фl3"U$t-B& ȩj^iD?ؕdC6!;6"VqT4SQ vB7SO+`Sw֓R ݂)mҙPFx)wVɻVFXX6U ÂW(&J f'#!+kMg28g2#Zlfzfg,: pU)Dے2&Az(Y~%)/h.m,MU3:[>[εSD# -&qcm"EilA! W)M<̿{њh:z 7ڤT3;O~7+O#pMsޫeζkY\k{dWavt<.{w;y=qP?:S,o,y551]']tHM 40, LmXDmET6qgZ]j /B%$"B)z:Vw߯(0iVZ1T q%qQ(HH4L׷V^teb$tc$ŅE)lE@1wtN$YNf!ET ]JFUTX"w/NwnItAyۻAtۇy爗N:q܎sR0:W^u9>Gދ9/W0o3 [q>I@I |q!Z O) `ڄ8D$ 4 %.1 LBDabnIN :jښ-hD$M2^dbujfn3rt8**[&-7f"IaV-ܻDZjHJuU4 6[RFURpiJ̸U*f5@ E FZeaE%3I' +YnkZ>>QNR/ C$ e9ŵB ReSHsӼ,ZT DL@#'$n;<\sD-2EB1e )kPAKEa[()4% P%HI!'rEj{yjg]-=u/^_rH. dL7w1\Ӝd7v"mْkbAb',PId$%Q4SM IQD-OH&ɭ$~$j.dg)ʔ -tnRLTd0!*yK5Bҭk[oAGF2L'=0;9I'}&!E ~AOku:O$ Вpa$H $V5lmTUhնjѪmFEVմmZ-mVըTjccV[QXE[QlUjŵb,m+QhmlTkm,mEAUj*,UEV5QQF6XQmjŵF-mFض6ŶƍQmlj5Ek5Eh+hZ1TQU`#lQF-T[2b"ѴhhŢ-bl5QQȖ +",jƲmXTTjK\kEj&-Em-cZحcmV5UEr-[lmT[Ej"*$PF#K>>gf>΂7=VϢ2~,Xͥ莜or(]@ݶg{W>;NY}Zʆ5Ҷ?!D !F ebhjxybX8 \iN]MC>V$UJY y}x=˘M_!dѭ͓o~E!_[y_j5ddGv|"#zjx52Ze}* ۬8Ox@(L 5`( %n.ܻ&.t[t㻻#9ӻmTY66 Dj1&Q&$#$!c&H (,I1_)So yO[#↏oH q>D9=Km._ J[@AuĻӯ4"=e?~/YD8Hm m0TRwuрG^qrWFgoѥqUn*H h<PiEqGG_S#f`B!EQm`ɭ&AM@@ Ɓ+pe=JJnL)ܘ)CF0MD0Xin o`{'wQƂ`@ 'u>poNv?MuYma%R[}aGDա\ in#=D ؈AXضw~{>zZr?Cʝ/=>bDi>A;bT޳ %s]W39е|}N+#qL 9F8ݓ-7YYvri\Bg删*`DEzG>O߆StV^΂mťXK6tt^!s,ed9w{OՇwwN~~C˗> t]bj Գ%hB\ߡӐ3s &+2ڻ#ݽR#"r:cʖ@!i&y:t UjzRu P$kXNٞ /飊HzH$kH,7O o[>CyuGh.<3c{m@8xJ6}jF+;jД õki@´RdihZ辯yw"ri@"Hj-E7!m$Ƹ7֊:~2JQƢJ\Rt"JQbva0'S">!h@S$F>jӵ>~j?9w|˫Ɲ8'ؿ~Si^O#>R": jB&3=~9lퟬe}U1pO O[LuVfٝ%8BG'pQL, v]Gm=Ʌ@x!9ÍRF=!q^F$6wbTjQoVݘ~,,^X;V7.t|V0;SZFZ NzL7. ?8(cn~(7 6QBe3d[3zd(S1Ύ-"h9i(QS"S^UzbTXĦUXqʟa^fR ])֡AE`s9[I%kפwk@ R;8R:\+,F{M_aώyO},<=MPEb*Q# R lգ`e/6O`rMU 57eاC {V XB& ljMswU)R; 6EgˇoG5 m߇7܉=[8"Or> WdAˈ5x!O% ‹Iew#'y(kwɟRJ&^f&}1\,;͜P@,IWs^1N fu_G7}t8p >ľ4Ky  I-oROcq FJݔO'=ю"^E~Ђ@r 70Q%Nr[aIEe6Ji4t]JZTQ,-|.j/E7gKotuS柔 TA"yn|]=7~?/Nj#-өD#Z춽MU@ wJR.m?YHWϮ*,RR 7TTo7d.":D ?,g $VUMوL _uF9!E_f@Y4+~#trf,o&cț`U'z0#!|?_N $'V0iB-G]mRY)_jb1>i,vwh ۲?vs{@r/t5He2kLd7jZs>g`З2iWHE< ^="ֽ{y׭1p=hZ{6J/Z_?8zؼ$ V)?y\^\c+Y5!@$`k@"k; TۍOYAV`B,l֡1'#Sj9SɸaDɼNP<"u[¤mf WuBQ16ԖC|!#,λ\hc[q^indL&{km_s lC𳷌*hT6ng**#ɧy׽ǺR{ Qw)1 wHQYf4^>u7ZiW*<)>MPPۂ"lʯU|lr8hgw5Aֶ!=mߌ JX,ۓ^~YV"łN{>1:`aOyPKdky7BҤ @`gfrN{:~Y9nlˣi!:Zs'b=rcRƏOj1».ْdɖ*"EB+Be^#F}z}1d%hXZG ~OAjK]5 {Q澷g'5C}/z?&;Ŀ:5+fJ㿼 x}gj'r!m6Z{ǒwI`IɈαQ1<&?ߚ bgpUv>=}e(>?yX8,j2z)C(*Zu_~Ǜ@3ȧinb훹ADu! 3 ۷Jx'9+7+@pp2*܎OȦD8t"ۑwx<(ԻeLX`r $PVb_x ,'ov`nݔRhsh 1AQI{#(d6U97ל0w` ݖ0Mo;E DM|MQD 3F&O,3fci 9! EvMaط>gKGc_O`0}c韯:O_"p{/ J O_uͤdN: /7;x>ozc Ӧ3"eƸ{ >k '6LVFӛ"jatI.X[T9P[-{|m^?`.B63[R^HH4 B8yd?2^/A&F13VxMm A Ծrc|_pdiokMtMCBn7lShG9UTS/P_/_[\CpceocOQi[OBtfNY9އ2yY]{m~$ "Sj>< /+ߍӷ=кM~g[N8]m} h6]*!-m?VYq{&̐V^m}|-)mDڥ-W7a"2,Vv~6 ;8\NUeSpj^J4[N }s@b!Ũ}=&oןB늞Kt[fHCrVxi$~Pո\BwWU\ {oE%g1K3/4xFM4~3=Þ0نEoˇw=ϊ2 'B%.:/ZwJ L˦pc3EԒf TyɈoJZl ~Jn19qiݻafGqALY5.# <V]Egn.m㦸b.&8 fbT2[7`6c*IK-4vB@E4jaEψJ^"+[{.}'_&]!)궩QCs $ZxC8HAJU {<,C͙]= vtc (nd mh Tl`@4[nǵ!z/!p$AdGzg aѝ 89@8q ҍ-SCtq|Vd 6R !o奸I_.x#rfps=js ;U=RU'2K\IQLiyܮc 2;A*#b_{FYdag4v%E2]Bk@E;3 *x _y$ĢKF┥|@eù<։B/ ɺP7xqk=g?he+nr%C GIӇkp#󝴰X_򼜵OΌo^p]R9lK3t82׾77WIs,F\1z~6j)CPPA溂"s\iB%6VCW6_'_DD/uom:vL0WZ/ ]/<$Pnϵ*! ok KNNZӖoHt,8 ~_~ ĥ)DP((bfHb QlKkZNYSso73tt^M2紻<2b%RAS=$0&@{~ `f:Gdga_xA ՗O-nvAY1;dm?h{yy)ިו',ṖLala,JDpQ4z_O6FUr#m~5̟M2꘣;r񺜍jLq "9=q_OyDy?_m]H8h^'>b/"jyjcGl8Miucse; .MJ,䝠^1dOmD:to~^IJ:1yd8ә1suQT*mCKs8)Mqj<}sd\/{i#jf`hV\gJL5Vy$?m=477%-o6;#ӿgnJ8 t %AĴZwkF hds}?寥]Pr[ùSOzacVܞ94R:{9hʒz-{Q'29^~bDAߨ9]f`B:{Y)MoMb \p;_WJqT -@2,o2\.#ά*oq',crL|chlX :6lHHM'kibl-?gk4F"Sڴb]nQ`wN};3vJwO71ZʳN'}\:vsW,ǻy^Z*/vU#yI"T,L0L(L(0€ !=R|Rˇ6MfZ |.7=}Qڈh; [+k;9Fോ:K4CB8[lzsC300^f~G:e43)ڨy-Ks]48JaW".a,&FMC7yYx-mT]v={GS8tQN:RwüFO2oUlύ4)!nfZ$LLiGn*I)I#\lo52O+G קẄQs8S[~UuiSRT[lDj,|2QҮ:y|ᏁKL%GyRjA()v/֮J Jj$BYSqxa^whm{ qDĭɻ8@kG;lBiq3M:CwK%ӜLykBK%쁜pSy*3"dYOژ4՚|r,{_w}&KtZ0B*Pj֘TYYQJKKyHX^G3'c1b̋8ߗ9 .L~t -壙i-~9TQ(it.M~MOz㽞uE&fV FTO=yzCwqjy /#1ڱM6ۢ8d4Vڎ}f+dzzǶ!,UlNl ?/Xg#'*n 7Hč]D+oA[H)nΪ#иMGT{x0 =#Ǔs/*03Y" I_1;4I/y&AYCQtDD-?q[稰k:Iye$od:"W(CJ_4(5^>\YwXǁYtn}T9py܎|0f;'jP(\NG_ ŠI(} |qC0=- ߠj)~Us?>w8`[Ll5 ȖXP'Aoۀ}dN h6g~nFm-ZO[wdkNLmAg̱֐#0dÔ/CN˨B;-?p*߶gAz YаcQZ"&~2nJ_K#Ӿq }b;Ŏ 5OjGt&Pjx$9$-;B+#b@A[9y" %t 5~S1cO# "5{yI4B<y'!1qMjwXt1ڞYaS $_vH0:ߧ9I^)Yv(Ue \جc5RG?wVfL'z{rp2 ž=x>(7}Unܿ3Ikd}LH`d}HPelP Zt"`gܻ:$]v!Yo%k҈ȵ7=oT2qCt) KY{榛!2Po;) mO=k"yǏMM 2'ߗ7R˓2|( {^;T(#0U9^֌et[qr,V }K`O8?O3y~ Ο'KOؠN$M,A?<,&st'H@XwܴQʸAєIw9_YwxJ`|LGaA5kRgD I{y< r`:w ^2=%:+@K]>I'URn鮮:g. cG+h:'WnifY"Z| l62XP/Yމn[{ki#l%B(*zf|]XL!\MҁhᏊ=qϯ.}aHdTBP ) ܭ( 쁤"CW9z^˴>ﭒq+?V!)Ɉqf$u@ # IAmz9ޗ?#*lԍC?@7mH׿:`{~Y{]s0u߽P830Na-/}OFmޥ\H;՞IiL$iMztBMl6v-⟮@k?~T"拷>w]Ex,g7qG~!, rhl1'a FM0@j@lV=ߞ!ec}Pgd_.;^Gʚk2݋$^Er3xLѹZO6;!"E*8R`ElNd&AvCـTiDyan2!碻 ħ(FZ~vX%' =:PvAvGkoSnavкwx|ۇptT|y W`wl  yS, AY+Ah0Tdܒ1$@#0VL-eZ}T:q]N] '],AZIo[;A0Yd4wW85Voࠌfo4=C]Tn7ȡf<'ea>ZQI){'"G޸P7bpy?Wp|ufZ /t 5(HP(ç#ghjUbI:?אdZbC #O(Y ;yuS06nJnw{5-8_GnT%> xc5I(UkSY=ߠdXwuKInyD<l/2+젍yMܸ*.R5} \XV61jO192#.+OKD\ecQL`d54)y;+cPĿwȚ5"iNn;2Gm䲯 0pk fjx"hZ|m0oO}@8#J)r|Ҋͭ!Fx'P|GZV|-U?4|7/ϵEz/~Gw@iHa|$v+ui-BE4(cU<}(/s;TspYs3si!alHRWC1FΞ砝Wa/<)Q˼L#mRZ3C\NWjn _'Ο,qٖ]`3<x!5L*k(CH~~ۿ>u\ 73F~>h^<n|P|šRZ̳MI4U D2Pa0$: q'e+݇G|,;9N_=s{?F.t1X;ye X|u~TvΠ\Qx1A/V@ `:W]H74slDIm4DCn,? `O?lAU@OOVe+ R d?6pE!SNBAILuU9_:ArOFE*@B?sؠA2PUQbP~?{Os`w`}\ߴ|$ 3,E/-ʃ-6C^C>|?i?#_sn[?vw~.9I>Gɝ[E%='hD*Iu}osE{gX9j]d ̡zrv%Bh 5)%Dj2iK _Gr}9?3~0n3~5?-í=|4W$:,kOm:H P) ' j%#{g'!`7,)y(% z\>*_Xp#"9G$ەA\N8+s]ۜ0rV9lU|R7SRs/M_9*Eb5cDG"$5۞_|zɾ:P.ŇD}Wp]6GQmdnUT, #N܋X`>*V*q9G (bgS>D#&댁СI;SHsJϪ:%7},RNB%]xYp'zvLJdRPN+Z#A X7pQjZH$&~ t`1YP&GM8Kg(9Y.,$q"8qN,{RS{8X<ﲨ6; }?\[|x{t`K#J(,U(  ~|‚O@bȬL|q|Qp!G/]kP5m,f?z_Y_t8 JF DH%4q=&"({dD~~߱®vuVŻjGe*2KdzB,3[3Pu{n=[wwc{9-J`wkQIV`d(T `T̬ՕvŠRFIIVNhKi }4,)e GxP FPi󻬌 TB +/1{q),9bfjB \UC ,bD$Ô=[y~Kd38]8k{DGAShȽHl0J&W`qeDDKJUnY Y\M90H]6}h*XIw2lJ% ?2=CS>B/b\%g3xE-7 džTlSs}ќYX̎ܚ}LngVtR@pG_'t6/U˳}}P݂hv^nW\ -2w!Mqi]—~{&j Tzʌ*dLjHޫ;ݔcYPG!ɼRsq 2 )-|M̽.[of;["=\0cZD?&#lQC{ 69xQؠG8?y&̣{:8/gpa$:-WkXeLMwhÎ cSteJNy@ plRT(S>~]98O7 iں>}ލJ0D8eNR˜ amXdr何ƇINT! $xS%ǚ0^]K\A(H h%K1VbZ{q)Z4Ҁ}*/@m%NW Ftc֗9gj{qnh-ҫn2OI>E~AH>iɘNL+?͌U3~+~I*=Frnqt+Eb* &1!U77H0ٮRm' mu-_\U}GQ*P))^Z,9FV],}VyUN=ɝm_`ds $]'+54;(]WVe'>ϧf[9_< +h&<<JD) '';]$Ş|cRu(wiߘMsC{Wl2N']]OvyrFeں\q~SB#73l~{9WB,?_>ŏCp^*|d?QAF/kr-77̜}e8a~;܀q5EEx h=5׷dmo {I}ƩG˅Nو'i*opQ%J0Mg}3| d xalrI}6 x?zɯL;=[~u/li Tvm+ygfDơWvzi$4ygF}}Vm1o6޽K}bz3/;\|p:{o b_; ^/c|vol5>iW 1&1q/F^۬ڷ+ m K*_ʏ7KҀeNښ$y{~De~+mǰun=+KveϢr);O=)s׿7[EN> 7 a!~CW5DJDZP~1ffL?}o^їk瘛_n[it| U}K#wi=6n譼h3xZҴџi5z3>{W޴E,{]m~fwL">]m_bFV|rJ=ߚS2܎m')c>#ԏ_Cm(DHDD$r'iKA -WeAxAѢ #" H #@Jh3p$mAlXLB 00IE*Pjp b axA!Ä D" 6A,$)CA"P4 !DH l9>B x)"a`mH,4 i pX- I$c.naan*sP` e6a}P#U#DQG $ X@ n Sd-KlL,o?[ phx%xlPH$(qi"_RouǪxSHQ{9ާ-U?ebT2Y$ -U#tXA[MN;9{gvou%*z1vIۻaHCktOv$K\ѦV=?+j+Y!ݮߦ g&gW,L&b0X/SBC׻1Dt8FHB]_Ɗ,1Q`ߺezލoF6ދE,IsAxf" k]؄/7`++H /9*Hɶ65+1rfOOK=5X>U"Y5rɫ @)ާV[VS$P" 5Jd 8$K,Mz* dQeSX_m$RY R.d" ƨ4lj-cƢ)!L"R]U 6n RO,I=f&]vC:LZ!s OKp6aӜZk"%"RC]t"ir/.`aU4E$Aېn`~DP.ag%6BZ!^7!5h/?R~e@)ф$/C L7 U敡/@.@7m؁s"L¢dh BvncH`H1RB& ̜{Cjj^\6j0 fUTb5Bs; jjj$C,2Ө`v2jL<:!g=E& 5LAr)%PzBoa0 _k4b m_qxLQQϖL!d`e$ZvL*.d3mNɄ"lT5^ƒbd3ISL$Ɛn 2N!۲ ˪4 M)ˊD6u7֒j'`o_b4%06Y6QC`bCm!a)%m˕ L77(iҪݧA޽2{{-lSyB"kRqoj%" HJ`r -Se.`5(sl2q%" u$9FHlɅR I;S']͆~0jf6 -[Iz@!5{0`vavTnoQT'ԅ5Xq5S%!ְ;c&C9B|!HBRr67D* X)4rIa"0kT@IfRhd! .P$L7IɼTIʲ'ZLY$/`Mf !NI (CVO9/^i 22lZ `i@4!C$H%̜IHq =WO"Nդ2T lYɤ~!2HS|0I kPqga L CNU58h!1(lf˳Pj1;`tl3M pJZ k3KnPD4!95])7 Fa{ (5`d˅2PHC]12bazN57[j機.@,0$ڠ l3޾ BK" &,t^lK%[jfkɍ0 U@K.NE R Bj0&2KN !g-Tpdg5i.CEK*4Q&Ԙ9X\Ȳi磒{]2q7CFnCgڱ6CI7Prq7{54!1?"3aC"eL59nN23d@1$6mw̙rm6KKAT֠PP&mm̖Y7x54 R&a9`a{@dF\Ճ!Mf52&LuR=Iph.I8n$ÕL&Y-HeCUIYdf 4kizT WI17$ٜ2 sV?lۈǎMR%1odf5Ev-*ӳFbNc=cMꡗ^L(M7.M)4&772b5*S2Lgg b7-"=dsy8ݨB8Bd/lfuՑ4f+7Sqbѳ=W5XpӀIت&Vm5h;fؒ:u2|]aĴ5/eOF1 "~nfq~|Zr,.f Mj{f.bendܺ(΁!=.5C; ~zm.hoGb8gnwx56Xl5+6[Zٮkݫ5R˻AmշPMgY}#oJQDQs:>W]<N|>_:k^LH>LyCK[yX%\gNYcdz>^C&dMQɷ[*[EM6>+, }M&6 @ĬZGG3 eOlD8c^1ғ;YI_qoja᧭f |6-Z& l;jl[ knׁEI$Gf@̵x=}Hescc]fbfٖ՝p Qf,P?Q#b*3hKPpinv_Q6'>EiŶ1M@ϬϕGmʳrF<箖=,Gm#A#_3'Go_9=l0נt9@}^#/R h/[H X8  ə5Sa&4Rdpfaɩb.6}tԬc8Ǡl{\·G}ūSqmQ5*ݩA;g6y@!3C׺c nߞ q<|6>L?GxߗplߦcҒ"DSK1'k&Mh uFs`gMƤ ,"q(̅T;!,Zy!2O14.@-΀'| LF%xMb[$gB6l+|i Eߴ` vT2Grs ː)9 5MP@p,#5$gZs<(#!OTRPBu/X(ߝd*S +(egAklZ0Y)$+ͲC$_9܂f&HHflj8(Fs14 *mR,(R\ŴeFf # $H*adAVH)# c\X&"G(Qe95D!`f<4KuI_ \B Pdg PnCdX"!6rZӠ!ywWZ!XI]fr .h($y9uPűέ2dm|aC)"1'<b!u"biF,ol0a0uvMQ`d p3Es9fJSTQF-t@XKeHֳ&B`R';%ՠϢ&C1HD2iVSʃ 6g@)JKxԓ q̣NEH !$Q dqU+b$X ж %qsBjx@BX$Ҕ"`-ȜJXTl_˭!>D p<%«stz2Φ(Q6a6UaYT&أ2(Gn=u﹜Ǖ\s] γ.# uH D dLL8f!mxƣRȃ(c~{NΟ^}4=ږqvH1P,f"Ez%șVM-?,ߣo^y-~Y}sl"sJXeJW\µ(Dgb-e)$uԶev6[kFlfvg:]q^6Vmρ۳tLٍ^rk;"l:OO=׿Yh<DgY&s}-5Wz}VxW^</W>KmOa]M5c4Dۜ^PTzӵ9ov'ot:}eup~畗׎2yƶ4F+w6NA2σw[&xgsmw|48mL2zVkn\jULoͻ0-C~ :ŮnH%S\PBt+A Eр<5dSi"\O#9ݠh[l,#X,Tp // 9fS8;'j֏i3R-w7 [BHzAFFCOQ=M7$4FmЙNًEꘀ(,H͒gyj4H-njo#Sx$ُn:r8\Z"0 $V CEJ ʛmwvS8x*FBGedzo5Gao g{%I8>PIU*,CQG͡ WU!4[`B6 -F.ҕre!\*bjH*Hul$/"bC3+Yń$T6)~h3+-AGzsbTeZ)i 4؃ʳP!ci]ؤW-H[7d2Gs^XV;*]M^\'h/I,ogI2\Ny0P,U"Ђ27:,Ne9%dxJ 1K]L Ӊa<;"3P̙,2 &ha1Tvc%ƄZʲrЅ#0dOOFspH̡`0DJ65#"$H $(Z7 < a8[D6 Og-BM#rųDx03D^gieQpGY *(9\搞("XayT;DҴ$`,AAA2G !GJ0 WM_|+IRѩXRRHIyۇۜǷN]TĽ0Tu} WPە+(r5M{<^.DLM~PLPɕ3'1"/gNLaxi/lRV\TMP).PֻzqY.wyvFkO $Pz"$.EʹAWxyxug~8"tetj3t<,( , B̈4FA+=]`V'!yBEL}x"0nn$`ŷ:ڛ/0_I }9SPHD20J+@ƉmngJEN ASʅ\kgԪ/ ^ >Ph52 UgDRQ#rpW 5H@Jؙ{H80oK4G Y8s[?Ξ̇0x NJ!qqY$$<9 3CwtV^M-&EDe]`./QDv鐴EQb˥mYfBxJWP}zl fhUNk^ij]Ps!ugbN~[gu*y<ؿo|l>%:M8hy%€ϼ0&Xu>GCC!qњ=q();q1qO|>?(LZ {̧KZw\N!0JХ4lBYqCg5j50&@hCGruVil7 l/DN '%gYPBt9,<ӯ|(I{!qgdlΏPZA'v1qKxߍS9swDũ_8CBh,Go~ͥ |F2W_- twv ("C=b(o-qaW"h8F)?>EdM$syC691 puO ?R ;pT`j,Ѐjs"!^ !>MzSz3̉s`PXoV?bQWEN}tcoND|t3rˋ;8ߢj׋7 't%"P+0pjbr i hɘalTPmE0`7".M"k>:U=B/\>J' ,%HR [S PVbT8?F!0M Vp&  cd㐺 0A@#E'g  Q1;1 5 Pš͛m@P|@  }Fޥtlit͍n_q{" ݊S~rM왤:x0 %揾E$_!v Ap"L 5L4]e ۤ6 ?tdIqF=V=$xhoXL}`mAAc9bQp_y0 Z,Z  P(p=ʀRwřկh3aBƺޜP'wwbңąw`"8!6^4ix`cu 1/cDy W+[Д5 7c` BgY*L}Bp~ ڜ'&ջn QZFH4Bߕ]#izot! t{F5V_6(C֭ݾ-!EdCEP!C0̲|-AK߫0 \i].jֱ c]KPC/(uu:M-'O"Pdn&b%  FL9@5Hzvs$l:EVdI u^S(fCC(} #3($@ vȤk;賿Gk(#'dC #ЊJϢCZʞIqXXý!աNa֜h xo_r6,.V Z*lG -3Vt kܣۙE]MlRpDP=7fv΀3? `/%=U|PFbU cO;šIHm"f< 0vN'p@4}G<ӽ{yp  Qe+"39ee<#jUaIY+@Ͽe46g9;(E0;˴: 5w!Y+ԫ~܋8$d B)‚ ||(ΧIN4Q9 !sUv<JY.m2&'@vB]ffb![F1V/?_zw}Wߥ(5sHB[6pIC!tk&$h68ISj'754)'#ۧHuk.XII4J^2>λpU~|@ +<#N61ײ8Fb 0J烢q&jiސ }Bl<"M6kf##;s"ABR8WDBwy񡀂zۗ]9yGsKЍmGՙEP = 7REޞ[3?$g ԣĸO8FW Yqmoۭ^~N9]3u3{'mM1ZЌY9\dHßI`[\A ,Q@ LX QF+X[~C$(:k3wԠ$ۉ>LHcz;-ퟕv?&kU&_h!0i0,xEs?1~$?H T֚QIW$$&#KͶQ#Ib"@,(J| ^rԅGhX$3R8 a&#;4,n M/5.:{=۾y|] i[7~%A['u7^sx%"ƜiF*qOH߹ߩc"wҘyI9̧veStZ0^*X*x?S崿oo9UfQ!u.= PJ)G !"ɒ@-@ڃy䙧kH1'"/!b36ygl|h* T=R9>39I9"@" R QtcQy7|n|Mysv6Bw˿sn&ɨ:2x,J1+dk  f9L"kx }˂ aRbB,`=o\Wr}:?:p~j~o'y}~AC` -[,vhPtlvB?': J qA#0h@ <5" " 9DDDy?[zttbQx_K,ÓZpuu3_5N?~8ĥeVͭbB bҟWlRC μҽ͉j$#6k5 QUZ֪,Pc .psmo)~H4`yɢ Hٶcc@8Z)RdTJQi;{_/u}ۤ02_[/+82-}f7r|%7U~꿢EpF/KOk;γ5{P16 '.fITk|o= OtZgdX`RH@V48O"L M q.dC(a% B`F$XI]_o`pS[~BW_Gl֊M C $},G@2xBr đ|BLBB̫Q8O&xOPFOZ,.I ԟc?s=5$ylh/ 2/hc L]D  tC8$Ff5ъ'I*%;q z @a%/Iԟ9!z9EL=&UnH K)O%ѐ E V@0 c, əuJ>Q 0ئyp"e? Y! Xkn{,:b3?ة&gW( ykΒ{>j-(Y!ve{WaY! A ŭ1a@5!VvNd(Ơ?3(p!H&Y^ r \ 1v؅xkߝÏM?wB`Ā8N #–~APU}]VÑ"x:^ #BRxt"O~< %"XLuԃBs3wsR2:1QI"1Q,jD$. 8~~?E _G\aM2zN2DzF,OXG?Ӝjqejp2XTv5fRj@iUoFm CBAM S~:ʁܻ^eoK%y-rAQK.1/(<}kUng MqS_6?\e,S0r!.(N2ʵ?)27,^ ~[|vkk{w/3ܮڹGrE:YWT,b;-v>Te'>nFj8۬?_sJh9xO ]\OfztӟO򊜛I-w<7A٣hSRR,)Eq; =귄\Rh'~Vw~>Y?$i&|QOֶ4v1zF#j ,5zV{/lѓ0μԸ+3u} ='{͡-ŠE񳺫 &'M.;e4ƍy^%S{\>Qun!hۮ6jj6|X|hI#$8&ū?l2xNI{L%E[hByZwo,s^P|{HW>f˽yoy!EeXarȾGѯv3]PuN`Cսݧ~\q@7n+B;FSjӆkq(G'r94 %JQk"蔋tO<>Bd*B P C(qzpd88^= Ʋ-IZB79. ַZ}C!=+HϏPtN9dq>ɔд@R(ő A~R$vc{oe;6uξ/Cį~~?9׏US}!hPN,cU2I2#XĘN[ pISx^ "oS$$H!Y-  A~Ђ%)2O2!##a%{? $D=-5&uC}ogN y;h "Kؔ2·-$ijЫr&Թ @HkI)+W C. ):|"Pc &T}BJ,fxHB,.K͂K|)H`p\ CȘ4jC  ;RRoT8O6yjOڸo!}u|vJNw^8RI @AD8]lm PÓxZe4}]SG)?Ɉݫ1ob2YmTc=nVbD @+c)<>aeq{(.eRKFP z0YBPh=FgvH]X3t6?’h@;[s11eF8>[@ @y(xgQM47t]Ӹݺ9.R2;׍gM/9K1c!L H4V;6Дet5/~p/r)VX@.{!ōSe;G3W9nvW/`2$ t\LyݫgCٔ<7|0\1  K1'"d<w1~x{-B5b@~`0jbnW<NdřŴiLh4!`Zk$4aqoN/pĩ%]B aˊI-RuV@9kM @Y Φhа3HQq),B j‡ Bny, 3@-jr^yUg]<ŸʴUy>\jӫ(i*4g;Fq {{;h!W=!(;&2YXr8yX C ;(vE A*Z YZ1 1*WŘɥB@E@4;ߵ>j7 5%_@3|xx̽1R=nSYKdpajy߯iЃbI/#+(3 ,E4<}-|IsB"crkO<+Q>+1*Vv>:HhTnsI>Z$J  ;hd4g.Tـ(NY4 4:\43^l^|3=㎭Y m'-y8@! W~qNC#Pq*Ը3=OoN5=@.cjՈ%?~3CW.~МapW i0y%Ҷ!w31Xep}!qAMо**P0WP]Ev #P= "PY6N|e`;;R+`^1y5 /PG⤒JDžQ[4 AcD_;{6\I)ԏZ@cWne{arVRI\Pam)pg~ A0b!<'ߓyi*)BK2:>!APaWWP p6 jfޕODluK5' ؃fᑪYg*ݞf+uR*>"')\Uk@t@y%;p^ b9I( 㻾4ye zf0 \/*H'Lq"9'?;*p;RXF5DzXz f ; @Vڞ@qA_p.8_4X<7g_a2[Yl,B[~j?R3vUﻏMG0ф_Jx2GDpD\ǰQ!γɮ4go.?rjh5ׯH4~=3\!SՆ{/o eD^%בmXda aJ"QMG ,b=T}K7ȡ:\.{>I戆ΎʓbwCz*]R;n}Dw>,z"P=,Y*4&뜋601_\c¦ 6:Qu44oCtw1.H](tm,> EGqZЄBxC!X4ݳ?(iδՒ0./ezͯ&=" I5(ScC0^ J#`dt sx7@R#Zk+nصm&OBi$[fIlL6s쯨i  [lt lMy{_DFIb̼xLԹBmS*ƣMZp6Q}.3mԚ^a.Cd2u3Tݩ̈́mA_ޞ8ZQO`xb2.M+ZJB,I(5q!gYĄn 2ȵ< Vfo7./lI(W!X{=[gUr@+ 3.ó1?'q@ǘ,s{}b;"U {ֱ{kN#f pBCv+XO?7x#Y;fM4(-nlv W#G] q..Voku71R:+]mR{GdPhT [~#kj?OiZiU/Ƴ+ޤם/0iU1QQ|]I4OUT#5Jɉ:bQBdBPTpIBuhοh& 2; 9qXap(P!.bŔ+|o/AZyCɿV`'$ܘU/%v" vIv1A$Ż@ƞ^WZEŒ6{RɣeY~[z] I$[Bo$j 8 %|arU/tl4 iO[%a'I$v-JGax=Um,i܂( <ڻf vGt_z =N'5q`ٷM] `ĕ_ U$";F"(R{vN4&qpP@j~H>AU{~ܨ7~9JWQVT}O!TTsOgrl!H 9"IC~ gӸ8n`\o@2&|JeYӤsX+rikKK`>Pm5$ T)˴$yLt}ksp64d\ۊB;`/8uPxpdGx=ATǰRoԭ7_Y[mF!""@  )DY/ߗ1N-)Exfnƭ2PI%FV1 X!*!\ QG#dP Pa<bڎ&1gR${c M6\]2/'8Hy~JyhQUܑ?u[㠽̧,U= zk$s͜Oح cO ѭ /jW 'ڏVPX =#(>c aA}$)|#՞dFwttC@&u!b$G)UG%r:=9Eg|]֮ ÂT)˲<4Ii(eIUftxf_=g_33W <]Ҫ1 a,QYj|Lؒ]@(QP!\4<@mx<,rfJ'ˏ.FYHvev4cuqvs3JAS(Fj Ep(~B<e;Z٠Nʼn>R5yV^;9$՝YFۻo EԪr@c`[ ?SU04P4 l I6"CV!h VVש:5T'mGĻ7 YyWͶDMY)ޞFE6)Aj2BIP 2Ra<>~* K fa2bO3X@t* -31-ϘFNu0&q`#aEBж=QYl̠dβe6@?"KJ!|Uh5[.V*& 7M! <Ý1J3&4ϼCcΗ|=D2j$|&.2%>npjRvvIml7!TۆzuRsbxϻEa!?^>HB;U6foxL9i7f#֩_x,à 96s+4@+@$\@S5o{$E!+z9UkƼ-wmsIcoEQ].'o;מUQԺ3p.*% m :>k\"+2Zs&6_֪JIYG d.J>*]j) ~er_!r &ZKuicԴ%=RuJm]J"ŌEC A^`HӤɾ%VHM|xl{^|SVAykx RAM,E@ -j \ P%jpT2!:S CBIAT+"Sؽcoc^=9+IfLMbkj:orͮ[Ŵsx4^o[ߕf_j}`K0 Y$^+ł܋^moFo\RmsHdd'HHE 0d"ad/I32rUP I0BId$6 E$+ !46H@ӨR0 HIQI0`$mH@!$0a! ba%$ f%$ ) vB BI12KY!.gPdƒӢ^I2I$&@ds$/aj-ZUljkZ,Y q̲H,% I{6^/B%$rɒ \BkdI 12B@,!)HI s N &,j&1[2&^@hKi8I@M(@) I!rLL)6D!3#$ 4$ &`|B聍 I3&o_sv$߆+s #ò,Hu8"Sd?64ZUBT.A0j $N̪RUUMp)BpJ:E'F^Sl.O7!q'ü"&lLLJ#h\ Nb;wbDtv;\ww.w]u]sӜi l`j`XHu%-fWHIN:),X22d&̗w.ILs.rHAh6t(DAY)DBDJA`Ș@ ,.QFD(ȩ!TeH2 *¤ u *dYiR:STʎUaR1 `BH!@Ȑ8%(ZYP,\8:^x7>G+2|}e/B%z N LAsSCgaO{0;g&q2gr훅(u GxՕkT/quJ }G5(uU/?}6||?5-X]2y*v.QgyxtʮkQGi+fG8-Yh/nfR#}^B,Ð/-q tto%fszVJ7Fxw~W'jNi5ip?WknvFB;pjf(4wJOZST(='P6>/E":^84n3P~;֞kNƓy ()lt&< E ""#BH˼Td3Ť5':k y 3enzҎzsKk:I= J:}O@KJʧzժh]$rrs@1(ȢNjT;_{j~Pncc^uػn'պ7w":uʧi\ 6 &3P)\2Nvӆb,Ž̳ZexK@Ft>s, Ϋ,wszN*= 0u2ߎǏ<ZZ";q5Kmɫ'!2nl+8l):m%t/M"mh?nZ(8Jm7]kYO3$\_miw;^e"*'{f^ZR鲱 K]^vj=ߕL/X|tM~mΝjӚR:.!&`EtGQ'v$Ý׋e.<(ZhkKwEyQo08U޾uARck&n~γ>GRL'ܼr'!ι[j9ilE|swK]wWo>SYle+yGN/f&>ܺ1zǧ+cd?cُ]z=w(Y0ލ4Z'#I? !9 CcxWŸjt}2`4o*e7 Rv:$tWu|Dv֋z?չXmk/woJeOreg!<Җ[ҕ9K4wQ̯ D)N.^Pq&YjfPgo;$M+cn4MU5#=ss#~Qٝ$+oO+iݾ1pc*X7{:vC?ۍJ*;uX.>rhϮ{5K>۳UVEm SJc~pv< }f;k0o?p3?v֏ 'Sp>]Lt/\g󢕦<+2V~v8\'_S#;0b?nzJ"#;a4j~ܽw&mwsJx5]FU7q-W1E{:U-E Y$oYm?"p>QEe2~ Z-g 2'b6eqc$$W\}_!j._:w,r^w]V: ]jlĥi27U2t>Vxw&-exՠ{x[ݴ#GQI3%\Xů!7Q T@H!Iv"+\]]y <IH!6i4daJHL&ii-V$x@pH<1tS %)dd!.Pbjb;!Qcb 1C% KswD7Gt; +stݺ./a\xK )`" Q!- A$҅i@FL%P#U (JL)EԒAX&:A H) ,R$$)(a(":P ('R0V *ۀ$p0 "I< BeYHS 1U#Y"׍}-Ƣ(bcKٻ)sas\w;ww]8t\uuww9ttwus9;]˃\:EPk骐?d #m>NB(?1H$Ȕ 8B ;]U&Zh(JETɪ73[C7pvMƞ6ͷVmq 1GgKn26ֳ\^AqU[7 <("P>t! {B7ODRXIޚtr3gM! QM!+1S0]_`N_df5jn #v_R%s+P{4_Sp6H4QLցflvT-fQ-cgû}eYK%(rD#7|̡XIS>{{oG<#qx7V ͳ 5?#>oݬ8,zf 1T1ja@]?8ꡃnEa3IKʘ "85x9rXp6H Q z&`ݽYY켻֎ 8|/N+ܝRK'?4*~Y>ȋB . R' b%WzףDt Pl @w&RngS5b@SUNej8ӽR$՟n8FN+z&:q.vvD|7T Rl@&Ě'2]gNlQA t<^d>+hDy(J+I>t sjl %_CzGܙ7]SV,?S QAH^>t<:,_*{jݏ6ZsFQvHZ_;8bTHcnՉ"C!я.~W&¥$\u2<1HM"#8Xmypbyj_J=ؓ:0)GY *Sizd_JrGcԷ:pAMf38b߄ న -ΕS B?kr MOg o(%-,o,tOj W*!{P^CV0v0#|6B_̶F8eT tÀF TVMU~8cߝ-5[oN|׸:'5L>fP$2,(꤫-+i DC'Pou;;o j6dzI>O ߤ9}'`)KEbkȩ>P@~n!'>#A:P@mUsۛVBksS6Cl?sXǐn#ʔդMg 9Bar[n0?h]7D@hi2p9|nX"Q7&h\B@O "ߍ uAdūܤge>fU z.%ܾx}C+MPު!4׾- ;FEJ!ln+nrHeC('(1|̎hiUXQ)J^y뼼h&yXZUJDְD ρ?Z{~ߡ~g|O FU:MH?R*Ҩ3Y&9T~{uoԜؠd< O17\`S-,T0SJJ2KO3SYJqjVV(>*c=])J@sXȥ_szt\G׺-Paa mK9Ry 8,5SӘxӞ3 ξp'cr :.p0~(&?AW~+~EU.Ix2Gޛ;> 5ٳ r< Y6>x=D,g;nw{vl2gϓfn^'yIw[ \$gr FJr>ݎN_d=LBg!́YRnh$}q Դ g:uNeH|)"o|>x1<yψ~Nuyt,m(m8w%JA|_MCD2[soz뚐8eq :rPw^IY$g)cQJI>'(x~*6T_$!6\wvll͡(L $lMW8L(.m)h?{Kjt ժt泟2Z5 ;z&Hpـt W 8 blڦzPkOryF Auz(w0 0c-12G$=P\)RlKH /$Of@%4@8 avLIl舨Phi +0pPZ%Y O<ںp?"z@xj4^oAw3O/mE\e';j>(.f@TL1@7!z2K @ pH00V&c$k/͕BM}58<CPqt(U Hx#p$eU3X}CNw%ZHli[g!m Bl)h}+ԼQfv=Z$?8S񨄔p;C=BXzX=C vOoK(@{Aβntw0DA&$H b!xETuw=w, |4P5#GbVOkR3?vȳ " i1?0x=aaeCb1 V^Ƭ,ٷZC 1UDb4i"aQ8/}̶b[OxI$js F՞xcBU|C{0EC!_(|e';PɧuY<= 4Xx8^٥}FEB*)gv[y<bhӍ[(1{یrKi|$#>Kt~b+!{sa'I'ʾ!H-%#~ `n|IS>;>3e E^/5ui;GzBlOIdK'ĻCk!cкj8?2|E>ECʯcY?6ia 2(T!'[_"Vn]PS}q Bc$5L17Px4] MQfsI+25{kIkCi>|d>wCuG7)Rr.RRIR)sVP*j $ڔϓFڭu|'o+>K ӈ\/JmwNp\vx[X-iꯍ V98)Awy98{{x a@[@"P$ՏdXG8=b-~b(=TyMZ8:邮w8v(I_W7ĎSD. !t\YPTZbGx0y^ k\NGHJZJQQiQj]o]|M !ȉӫF1Ч)Ą$I? ْ9(Mw> cCYO yZܷ85;(3Kט;EVSJ2)+mQx f%͈,m %555jf/ ;6?}}WЄMIXZ;'K>Whuwzoq_tM(I -m,ڹL6-ܤ蓘!GgK3=DWq۹! 1|"8}\8WB˷>#,1k"D(uLh=K$':m:>򨷘_DZgiC\#?.%+dG@,Cp?KqtGAl$/~ag8Ew0we&$nuva jP+>qvC4C9+Ǧ\\L3 :#RmC֪]9e@ Dvw|I`AÛMV0#{qϪ}uz=J>]pluV2QG$keB(y{9lPF{ j~ a8/Yq>Éwe\$O,2bl4 ͉ B?KpPQJ,&-CĖdaPvCy0Ni>}nt"#.⋓ف\CIdX`P36(foDZnu?v艹>q(MSƄPT/aP ^JJ@Y4~%+_q1B=%q.AtI]}.B'@@Jt\(_̾ W3ASsF" GBMÐ )(mS3=Reo/x Be}Z^SzM4t7qʹ ϟ`6)uy3{'8mn'>JP;9055p r?]J>کu:?qrݤ(iGydw:n=.Աrq  =Z<1ð0f~uӀ׾ ]iN+)I6?VD.m2BNv?4O)'He:vqB[; .#Ia6g\"_wa۲aj^}{h}}ݯgYw?R&6`q|4uWLq*;->:i|߷sǦHpŢrFOYՏ,owqS OyG̵־H+Qt\ǿwmd>eEzձp['AşGS[G +.OIwܹoC,}pv\>#4ŵG[b2mnCO mS"2KvDҰ?o>M6:Xln} Ryт犣u@tWיEKW+//=_%.?z j=] l7w<\ |Gl]\C=0M-|ǐLALu'?RaSZ9 }i(m 304fim إ lFRpϿnV'4t!C{0|1&<>VLgXHH[N/_*E7ET}Dx#C~=w %ƋBڡemh5xGZi(bŶpg ǏoI[^`U b!7p.mzzOއ!v=ۆz-9 xL)D ȎPނH^A.s%͌Yb?~?`}wͯw:{:Bm3?(wC$ȷe b9@z<~(KbmLR5|B@#Cn+f>JN1qǜdj;slt! e ъ(OlHk_ߗ[S^k=;`r@3ΗP{OTںKR?Jq֏NMNk 01ۛ`#Dri G( Jz׋ I5@又IaޝՈRp OɁApexڌ M #h+LE00:Cra/K$%ZVcYJ(=.=${N{G8HX2 jH(9 7:<.1LRͺUxNKY::a DMWͩݾef PTi8d3 $ wcyўcBtRdD_ͨ)霑I7%CFNiNgCS|aT b[:Ч&H%06 ~ 8 ?b9.Ǻ`?8Ġj,4(?#6Hgˉ J$'WW}j)=F<\>W=_~[Z%%8s ߩ$G0OyzJbVDȿb4=/JƑbT\fl[o_|]H+e!HB!" d T?MЧ DDK+;m3f 3rl(lQ*#;F4"HEC&K|җ-F+I!$2ƟaFS'zivs^l*GIq5f1M|HPJo٫%5ԒI=9"TƔp'HP|L˴9:~IR7} 2Ձ!1/=+ӝF@I)9*AFZcxesN}m %œKoK%7 `O 1[<|pO q*# ўԲ%H{+%2X5d腑4W$35-fٲʹH5i{Eoe‹mhXXk9Z]sрK]oz<`ťK?M&qڽZp~}}r'S@90 Վk5yucWD mBSŠǭ6Awβs ⽘MMFP.&հn&m qjf@~1)S%ᯚڜFQ|A8z ! ȋ!kaigߞ.ϣi*€I ucrc/<> cؓp9Fي,>Zjbğ[6?|8/__75xMɊPcmZw =ya>kx6YD1hO!.VT؞,A`1 B O72ܥ4a, %12dw-2NU|KB:3L~rHt.❂FR Ic?qNjf'>gznbpϗ\Y{G?o" U"N l"FVTid%bJg HSq,Z&?Z4I4uTÔ/_QN Ԯd7|HRAJO-ee*xڥJj=x`|hA P _{99Sbk4^eIm)IgΌTZ>oSGJx@W$bpZ-6ͥz>Sz4X C_PY$\f+0tCV |1zJS[uS c4[ kW,{LRQ~{"6%voPI d2sh 0$ g`h{Dk1k.DJtœSGca@Q F buek1͒y#ņL3I_3%7YKdL㖐3ȵN(<4CR# 8Af*]ƊVB;0/[J_@;=h% Zp GΩCŀcz8_lC 'ԑ(L% NbsE.kϜnXױg;mY1E dͰ2F8n[& (s sSmco{U*T]b P;gF hc Xͣ^9T#׹/Pr]Ϋu:M3>JOz+@R>-E8/II c ̜Ѫ/W V1He_ $XW({6f@o$?ҫ}3@$_.XN6}NÀɈ tAkThV_R={L"YxKE҉6Sy_~)}ƛ5W4dKɎ{l@=Bc /_XPݔ5&v\=W/cK~(HbOӇVyiD\Cޘ@{ u Xf3u<1Mo(wB$}K$!]k7 CSgMst 68k:5VFrf_hIXpG.~.:*a3wB[\ߣD(i;f{>,Sb@[E1N~8v5.Fޜ3W_IP gL4(Iuj"}yb+w@Oe!N[ ( ?RFs]Rӭx74@ɨ~磒3YC=T=L^$È6znMΏǪ$p)J`6PrL7u_dur$i ͩl1xL;1Yi ,3$h_tWhY昹!]5`Bz?iyk&ls;rT|NzJq継|c3h<Ϳj~'o6}3eX\+mmD!ٯ1)8{f3{?gL8 :Mފ۲yuT,R8)Z-] ƛ&q! $&j/#qP:4I! web9"[6_ɉX0&odpCK^8EB[ɱ00Ďvj}%hoM )]qJt֗OeD-pSI1A(cj-L@`% jYd xeko<ٵlZi[txn g85 E_G3:sau5@8pzv`#78{;U>&0>d!w %-ЄHC !n2{Ha ( bO8`L h̑!&>a ;vu))L,_Æzb$? &s}ª!$g(N8ҒXdz)=t?34hj%_y7RPO#a>ɶqIѹќP?z.hl'؏hm^*h8A+U)H@W]MM1kG;z1^x^ЙXA>V ="Sr_Po1% p keؗ'N\7⃔og)كt^ͺjMʒB_%mq4>M))t;R4%1Bа%l|A x0 ދԾ OB 5pK0)D3ڎ.5 H-0k@]W9QZj`oc"ccڊXRǒ + [Ȍt(^A@=Mh[i/-{ѧoْصs缡&$Ԁ( v<C=$t8 x'U./3:-x.0滙R lB?u8wޚ6_,h|D 7YIfVyk/v0;<Y_Mk,xCE#NLV}O<^+"|0IN6} PJz!V@ PO B,J-Z)q8aBx< ï"L^GC{?aH`Fd 0ǁHLQe<>{xA:BHJm@݌GX49L”{=oY}ܪP3x$c: " 6"wkTl9C[휴3䂧7ʆoXw9(+3F.e've%L{I!SĜ1BL!ۺQb ">7Y*QZObGS8HS]KQ(NW[Gfxti|> 7=JV^ZT=ҞݯWA~ex~NAt5exxs<н`"=xʖ48[?%yM C{X-.Nnv`S U&rMv* z݂QI^~hs\E|)W-4GQrDF]vuR (dƇcok)O~Hx F^@ÊE<ȱ3;^|k&uZ} k AӨ>#M44 U(1 ^}JBx ǚpd"  ?>x6FOSP5O}YSs7,}bpu7V˥5~FI~%": VY30G=2+fH_~!0'#ğ\' K1 mJpWߞgC3z /Pfߵ!D z{A8|3@̺$dE"/)(CN}ᕾ1Wz v^l:䔃-^17.ыg9}#WQLCk0$ic odp*W`pio: $UtWr K~qꖭvG Y{cr}FH/|҅E} ΛP Ŵ1)t[r8j:umo=ݱlf1ЋdujU+3{t a0 z.͚[۝g?RV.SDcK&"yiyoUi)NFiU'D[2yu,o}˛k;i!-:hPhˇpf $kBj.C;qcڌ>Ijbt*T^'0sl1X 7nrɫr:_|s_bQ>lv ~䔑ҚEaaP8ftfAwۑAu}Qy~@yKW758ĭ"-#Wc<8:ώ\FSP 92i5}] os^aU@Lhy5\c+dNI/(HO6 Hɮ4eܸf2imr kd__;/iOygg7}9xlޱ} ߢܝK(w'`ikknt[XIQ>Ue毒rK;eZ%A֖l%ѽSO1AASϼ<ש>g9a2zv5_ԖE<>aJW1L0zLk6q-m?rrI[@z>K5=w7W Ad ^}[5Z|3ʉ35+{z"$p'2(UWV~xyXY"IAT~rO4<_!d1_k9IGQ PRx%+̐{VDsycF9$%MW)9!>4xE~kܢ#(F$́@ >%w.hGᕼs)e@$#XC$ojgzvJjTS5=0E ^)~̲J9TȮ{f-Xzg[iѷ =Tfq8D 6!#xDt|Ib!f;D~xgZ\m(%%?0i3AEAL޴$ueKt(x#ږsk9dn 1)| ז&v>CRVhcL B' n;|]^#8$I(jD#ŘHӗ*ipb|(嫍w-^vo9I=I!\9tEu; AH˄1mk_kÛ1i xlOsnKÃ[H{OgkMfжDEDO6=?k~`Z}ݭ3 ܽ?a*aQD+([)oR6(7%d,Vf>.mħȝI+i5:/o~:r:/~9]~H,Z*Z!krq0nK1˪;8cÊ&QEL}ak(ug" P$)'2/WOa  B4C DF`vL ܢSyFr ~$U =9%J"L.Ntu 5"%5s]Bx D PVۇ1D|lvg-[7 r{î[}89+e ;|繏ʀ]ɸǃ3]qA.:uPF ⌓$@JS1>HTGYӺ8tRJO]5A:T[Rէzdz񬽦9(HI b}F9 ŸeB \AgAobw~#h I;UޤI{ 3篍 re' 1C2Ô,;V>ۋ.zkzGmU%Ֆk-Ƚ~alxJgQ<ѵW`/noƲe1ii U-f}GVfv憤\I]/qsxi6DZX$"|vYs:.V!J9A#OZAL[0S)2;nS+G$|]5bXkI/C?(vvδiNV͝gA$i- 2D)N=5:ISr@f|h@UϜ= S(X [̯ô :w3(0uwZ՝_-qzN0u|fY:I u[{iT^# jpeJ U'@*'_u?ނR `ІJ I`bp@$II`EKFY4Ow? WT? Tz#Be;xŐ  hX?(7h|TyޢxBk mELO+E&N#h?YR@ Z;&F@l~ & ymtTx08D9!ga7S8YHVͦXűYc'z|n/#_AaOޖ ;nd#~T42; şAUrNa gt8>#R.?cqJl%A*= b~r@v+ #0\AhYmoӊ%IjK]S꾑@'<T9䤝 CblΆI&k׼V sߍ-5|=d;qfz ->l28k>~cس5{d 0 DL9yhS=)٪ˁ!#2 ~!' bi F[AUq ذRq=UIne*[o1 ,^C|Ja[t(x7;V(orW?ژ!ם;]DC+{o >3giz$%E x4%N le~Zhuxc߬gy?nw0&UF(I+7"߽3DG ,e kgiFE~Ҷ fz\Â.gۄj3di&͒7+cǥKu){.[;.MpDή}9N^gwШ3L?果cBůw߳j[J֋͆`ڢ)AKa* Z!\lSK֣+\@W<s@'0V'X0qV ܓ!^s1lǃM>oqѱ.A*eŗvr700=]""ycFW`vԑW ]?f۶GQW)g@¶WHW#ʹ-4i3yWcI|(t f'lUrYv=32C$dn*c TxNK+y:PJ dK钇E>/{`C DFp# #ӧMteNŘ1Fefvu=ʟ2ӻbBSZFvYl܀wWO6(yˇq5toPW9>y1~:fۚP7pAn:؛4Qlk 9Qs6$FrT\&Ӽ w/n&QB@ `uv.S7;t75JT:{#+i}<]"}c |ut]Oެ +Zr|c.{ ha+d SsOd't{24YDD@1&"&"~3X;4C:qz_evo٘H[cD_|:~o]:9dD`ydOue_*ԍX(nZH_ 9 9c#6K:B~=l  =b:Qk#vM{vye +4y>h;XJU0.4*K?o zx=芩k e ꏯ%NrQwBCuNS Z}ڙZ5K=z9-lC(;s3afp'WIjU~*2 ߴ!mLNV_1q8E26E|9V(?#Bo7ZT:OLbxdhxJ9 bA@累 LJ{YӡۆV1:va>!5/G]~==ܪFNxKqE qo=o9 DSbnMoܱu.1)6M,'6zBl ɱnQ"H-E ?Å-{Lzy?Σa)4G""z)%YOw?3Wz?ok%BRud,US IihS% [j(#u\hW*dCAdw"Lb]ri&g}iHӐgF#h}jhL,!caD4}F{T6$ Njcc|s-}){(qF`S6 DA4p0t5CߘξcLՠ"~|R&!Bf / wvmM RyoNEe;@1opG >a轙Yw$ ((8<@D= "x8X¨>F2ubDCeK`*T7) N)@==Ð>ESܦ/ p x0͟vBu{}pwRB,Vlshv#$0M>өRitQ@P8nur_9#/hKRc@@4XR CB䭢աKBU A]ힿOꂀU9%3fpQ C5Y-+ZF_Pٟ{Ѱ/p5Z9eq(iԄ}`?Bh9(x B%)z$)trfN~c{{6.Hk+r :VEQO%wءR#. ٻ"0A,*RlOT*u0_moSgfibCb" s3zeLlŃS즻: m)1`|P+)#vU+?_Ti>zد#iEɑ$//tM,򫋸 sEzh+cRfc nm%6;KZ\ww(7e1~x' J\7_Ю(M_sTߜisVTrvy2yX/䞐VɓZ .S^nmsEul?}~j W yRݕDpt0s͏ /r!˸; v"4ϨtHsS@D >׮HKX}&VR]r%tI,:&pf =q5ߌ/-KOʟU?L 9uc.*a< !eYo :+/ysh\tOՒ1ܖFRBFgYgT'ccR(C T/?G|=-q]HP$Q˯rJ!`fm(?"pY#XFLޭ! +ԩ <:z>՚=-gOX,aA 7:0_z^f;p$\%h1Lb$n.T#UY|3pHaNblVD4D>CF|R.pRk+ N}l}%$ȩssgH?,H#͠A`RlJ .z|#ii?ʤ[UE}>ߵy1[r0eׇAtGB3Nj~b46QnjDkZ#Gk\߭:-^>qo9~B>rn~Gkq냎-$Ho£c3ZgI+|[M~\:F"sKLM[ LjAZR螺ۄrʑx~IBycv!Ȣߏz LWK4 7ǧݾ, r]D07,>VWubN"'8??H-Fs~=:g7\zq<ݣW^CGF: 3hXDzsE.A?E0b=>OPԛm -|hSt}6[ t'ڶ;"Jq5vT ntNJEx*V:O/,WJ5[5ͯKaoCF9;W\-tZbiivJv1E'i3Z;A:]U!%|Ig@p튽UF􀍵 !.y0w>=r7㪟`՗<˜bm]{3t'1A`{g:zic3VY`INQcxstNyYe%&%李j$`{\~[?.fYe-;oɿ);=՞vGYdӺ/\ 3 >6揉_G/R_xU)zZDOz75 4YTg~vjNB廓l}9:{8)w=@e+xNߟ_Uݯf\kwfS"!t{_ſGnf=27)kͪ~3]ojW_;r'%eT$Ai.OXnrv~fya;Bfq啾̋,1?Mr׊H`>=csj鰹pgp܌}OEbrv)V t: F&fP" *7$nz$@:4Щ~o ㌅(볕0GNM44D-@4eL*; ٚa%p$?77c T,לEKIh2MRn$H(ooNj\?U/;BϹU AAkn:t|T7p"2%z9@j =/rP+,it.&3Ua>V9YS*˼gYCA/&Ns_1C5C'e=q;EDöo2תnQ o k" yҡ-K@=9"a-y;?3eahdXnR4dE DfFNDW$@mw()y o{W1wc"kM_ EcYahJ`): 2)@PeӎтF#$ aJa`Զ<'Njujm1Y4+yQ^= ɎIx KwQ+3zؑSixGEOcuv+}؍#4L1 cWO /E?-ҭs^#TS ##2 1++Ǚ_$]C!@V{C|oeO,^gD>Ejm ՙC~9# _@{v~9,}V.=;;1kb8Dhʣ)G4N0hJxK}zҝWN<".x1O#4ZToI1mw EjSuI*g^֖|%֐0й"6(7 ɓ֜k_ԞcB&+}jÙCȐv.?O>Hm@ۭ ± FUy`ӽ?*V13acZMBb40$P9緃e )=<:tE×QZK y3UY$ǤoO1QUBhPRɲNAH0Y'zR36#nOCnXd =m|J qZsسA:V9p;0F~4ӳZ0׎&fu*LA5>L HJ#e1Ǟ<іpF8hFrACåu[Ob-ٕd甐pc$Mv$=!9.R4<6pњ8hhoV$KЮFlCWXL[BdWܼTeZ\W`$NTYqBߍ5Lj `Jo0HƁFiۆEB%=Pw5uFe 80 t9)U wKuԨDsvȱ !)f%ؒܤ_O9p6%MNJC$A. K@x b,|Q{qMO/}mOo2 jvu ,YUgRcy/Qui/}\v>(~Ýĭ75vٓ د&BkV~.$|zpnOܤ%3#+NX@_s! F_ֱ%`2)`lW3t{[ep! c=Z"iP`s;"kx?FyVчa֨[_uj}Ϋ ~&s\O(,,!7-{\ɡ̙\aR?π@p\Cnglsk+I[U6G RL<8E0W ˿Ғ~p^ovQE,+T1a-Nf3#<n\^I@HPdxmvÏ|ROZ܎$] 4ЌJ cx?Q KNٟ1Cmp|5y?1`L7y%ŪGsYMM"uq`Ƶ0Gٔ|6YgHДl M o#}ChnA:֙L _9VoQV *fUhtk!S$+ qVIT?<Y/Q30`RD PN]~9 FyD0%ʃ*om57ԥ($-FM)''sخ7Ӓ>MJS&ˉ{wSq# -;HaF C>L8ݑAuq4}a|%wKpY@)ݸSg6cXwjZNYڹ[a`e\a\2#ؿR;a/S¯:oB`##!CEke#U-o( !#E ?m$&ǥ@:{bA@zQ.:t% $']6hL=TPZ6GJ6~x;[ 7>z!ؗw!PғpE'/glC_|C_={O\Gcٖ(J˜H[V"T>7vϊ{y?\=]y}w n$N7\<=#wN03HT/fZ,1SoxoI+Bz=z@]l~G 7v޹,^i0P$xG~UoJ߯y" ג>zahONTs}cG;KW (z~KY>Z8%) ;,PW=?=y-P ; s-B" {-qؑ6 0-2f HpE]lWA5R. F=]+ud9ۣBUX@^ftEc)Xs 6̱ߞ|I#4 6|gT,߁`pϞ8K=[~g6Abj7ޝw>#f7 ^yC{Ɖ; lLcfsWi_/3/\wv jTo:gax+ %xv(3vW!"iGI1Y eLWZ9][LE2SpF:^]xZRAf(Au. EptxgcOk!A"Α/̦oU-N 1m3MB1͘Y@)J ߒִxsoL}*0*I#7YEӑף85+魟q z9c޽Yu[<=<UVD`B :"{xcݕ[}Pe2p;/~9;^a4"9Fzgt^ӐMOoEH(`4~4C+Wn@ĺ 1͝2wzwlhÎ=8ͦǸ`.L_?e毊0AsQ09qDz TJƞ(o9Gu?*|M-OPAh2il:O#zw^$)v,f/1I`w;Kcؙ#M[L!6GP?|`2lП!UE B&}e$)ZidM¸7pwjCfh&ϫFf}T+SU}>@rzfT={JJ }xXVBݫ)!s e}xEO{vFZ5LfD\s_W<Z$~.HY֞Z٤M &%JoD\3REWVb-Z)?6?2F0@(2'{ GҶro7|eeV݅rZ/;cJNFÖء5~.TũPJ2՘F90)ߵPR$mXŖXTIuK Tm'[ zTvSG˷+N#[,\3Uԣ}7;Dߢqj; ><-40oĂh&1|3ֿ|b7-uOkЏ,P\hXNeYyo3!R!&zq0CNrrof. %ޘaH_4r{ k@Rje.ݣ~1 &ڽ:}'bM6Z;aշzvA`S 4D@PrKG7GgUADX)*U*SςauYlT`L0d-( &4aWG+P˶t13k]MD * [V JGpגQg9`e#tO&[#9Cuk./[rRI bj $9v(%fڴP~( drr HYv.hx=mHQ[˖H@@w"{T3Y:|LJ6i:TOn)`9g(J!kF`1c lv_)ʗe- UE$Z,uz!,>q. QMCȣPEOG0T-H7IeB]I$2b,H _:^W:t'ާ_u a+m@7P) y>|hAVXr_>EB-'CN631_  @@!c 〩qe#<7bqxZX.Y¥Ez^8+TQ@6I4/Rf^c=!cL {8Zaݙ`sC -RxQRf=i_. ~E/l]6' SYĩ8w-E -Oie$Vmui`P7%1UW#^G% ?B,?KJӞ|9oXA&j]" G]I]MpHP:J2W^傆ܲqmNGt-,!^%w<JK_we|P^J}I¦s/7_yz_h#pޙB γ_Gz:Bo~t7(CIHwwZjߪ[=@*`ۀqlj]u˹^胶&kp9 vYdGelNeGXsɥ%u(;ST%I7м/d  E<#92/nH@=NXOz=[,s'B [ּܠ; g~jf(ECLWpԔG8xbguzQkT1]ܔ@C>e2+ }U",aS! E1vV-Y"`ފGMM|I\/=M5&p@DH)&=8˧;~ּϣޫ 6˟@O[+7m?ױͮ}DF{$rD2|aY-_BurfoZv6ms !u+@O!iB|haDz<$DItr ]0zB;yuMC=k ܁H !UՓ\Z҈p} 0~v^ν{66꟡z-:J' 1g9mGuo}!QU 0Q4mE Xi>!Dv}_w9ۿ'{wӋ)7;#B:Fϵ%~BDZ/Wy{8eo[0ެB]+Č0#_0>Vkt&ns>#uNmʹI-N뺌]䅧 3b EرMgm B+>ɚAb&A"B[NêYa35=ۮ ]MNaVP]VU|EnZS"+\,#I?ھ)hic401K:݉FQ "+EXϹ]oo~{{'o}˻(F.h9E5%\YsRԺĭ<߶#uzۅ.RG)I ?SZ9e`5W}3L2(*Ʒ!s|ТbG 4Y;";[Pk^^K{\Pz|+=󾲇 I66"XHBN:}j!f#[F+{6YB@w=eis;i)Buը(xJ1!&>~nPO59D{e^"2Whxi-B%lnG9|v@Z Z{ǭi)=E( N1)l/٘U?*aDLe% O Uy7?k38ܛǼWKwӞsǓ`~y|xK";>G,14LJt_D;*B}@mq =ĢcquT;r$1|GA^@D{!&_7ON$b͹  Or^pCj P3CJ7@;hDBhcFLM E/$#d҂X"tSUU@Yk}Лӊ2jm>8@qZ{2T5ȇΟ/ fuD&DUOW=R:ϧ?c.}_MݳOwmi9X9^ =%0l{t bqk"`bd\9ݮhf5nyok"Ui%NNoh!~qG3AYX`d\NY%kcY3Uh]V{l` ?1v5 }?'_ C&FSQ,J 6$! H)$*I5D*Fɢ 6Dd"$XH<7.{ }+rH-3hVe"@".PJn&4]eKk\"m3?|/H"AcxCp@_Vsx1P #"*~o}зw>^* B7rֿOht|Ă9f7o"$KDb&3F%>y-SI9`Lqx=M+~??~}?Ğ$UH&hL6`1eB DE&)$Df# DH  -#ewl79] /v7`(cQdp!ciCQ5!DK,JcU5BЭT&u˹:GG QD I@VAoig}{ݶz{۽}7vセۻzۇJ{{Wtݹh^{3e}:j]}}own[.w;v}k޽ٺ}|w]黹ow|랽SyS[ݻzN]wS}۾׭;j=ooֽ7uήU/mݽe/.Ϯle{{o[ua=˼Gv׽{v>]wogݻ׺Nw}wg[M{^{=֥}ڗtsvwޛw:]{^{v^Iݵ{r^y}}ܸ]ܬ;}_nop;{}kvw dzfZNܞY==⎷gǷnw9ӷ;tY;kBm{u)kA^)سmOv˽fzk\ǻ*ZTw@mswYF۹zۻzδ=y{{<^v-.Too.zokv^շzvkkV{y7k]޵׽\nze{ۚVn[ƽدu=wMwU5o76.hۻ;^H7xݾ\]Mֽh٭wZ{NVn^vH޽ws+ͽo\x]}w{Ww;y׾wﯾ۷/g/}uz#w׾v>}n_7}m{7i;˻g# {wf|KjwΞ}ݾ{|'zmw.Ϸw\>}o{}zv}޷wZ޹Wc[W}^sp={g[]<;}ov,wuk[^/pozݻun޷'C{{}vx}{=]7O>{z{}^lw}_}ovuםkwkgnOnNngۯ{}O󷾺}^y_A_{{>owo{{\ۏgھݵz{5U{yg6^^ݮ<ww}ﯾ}=]ZSqݖ{^]ꫭǻ}{_c5-}=#wjϴyvvo}﮽[ޫu^tnׯގ{{Jw[=[}7.[M\ٯGv޽aޞޟI{{^|\gu:ݾ޵m^yz׹|ukۻܽݝ}OWS}Z3z_n{x4\]{oq|kw]w}}^o;}N]Y^囫J/]ם֫{\㷷|wW[ռ>}}w}[Mwlo]ӾZkn+[ywuz=Wa_s=۬Yﯾ>rz1wgj=zk[]Y{zm=>G-1{}y;wWwSro}ݼëGݺiޥo}NO.U*m3Ͷ۽wo[y[9^^uٽ١+i޹>{ۻںSid0FL&biId#&`LLPx@ LL&a02` ɉ11'F&F Od``T zhѦ&b2M0&Lh1PL !&0`di4ɠ`&& M4OɪS 4 ɑL24i 4M &L zdщFLL& 4&#ACL@biALM42h 4 @@h 4y< S &>G/x"6bҐS  q-=fӆȈ@g7 Cң$Y'I8OA0dq&܂A`0A& `ΚmJ1L0)d  $$h24 0`f  4 !`0aFfkAh0 !#0AB!c 0JA @34 4 ! BA 4A0`ЄCVS#XФ2BRu,j5]*42RA,%J%.j@@Y BB\Rˠ.Max5,–RM!f ]u,(ҔHoBL.R$!]dˬ\k.ffR@JkB HYt,f!e)&,R]$%,PХ$,B˩uB$!*J#B, `fk5W]ud.atY4$! )z 3Ha)&haHKEڮ Da BCc6fŒ@f5fRQ],!,Х0ёjY-ChгV١,ѐnlav6a$0 ATcjՆ]-SD.ѱ MچL06!87775 XۛT5d555dWdC%ئmj v͡ c A,nmdѓ0PEjj62K cfV4ln a1.P$`"_!0 &`0B  L ,HtB0I nCQrŅ #Ha.2D0@ XR,hP)A8t`A$DCA\a.t64D&%!,X! <fϛ,@HLO@*@d覂[DL#U##$K$DUL)"& |eUd!$*FɕS)S.X1,x! Je*$V 6Y*J&TaC$*A4!HD3FC Tá8*.xP2hAFPSC4lHDH#/Xi\kf siV)XJ⫌B$5yUe:o-?w0Eл,gI{z*ܴ(\AT[:L=/=2=f(bHND3HXu  p'{[j^ZfN4{F7_B}|6)W0F,hM|KgW2 ʅdZWʯ cj:bYtZde-ifK 0!X"h|( ٖ7˅ɦ$bVw:497Xoj3n*YQ]ռ$|^DZ'{'W$6AWB@-N{ 0@nD/ 0!dl AI P D M uMfB@Bؤ(X3mp]7i9B:ה[op 2@znX26I"@"DAsFZcdD PYB; ŃCJ6|$,ڴ092(@ h" &ddB@" ѯ0b!H@0*@:'Y X"`͆N4v " 3ICj qF5R,Z6:LteR IEb**id',c2ЧV #"6)F,SNB]0*&,$?`! 4,ҪĂbmȮ8&ZbDapd!%&Sz6txƙ-'b`/[$TL"ʳӹ+اdtYDfmc)4Fi#=s͌_@ܼ}- )sN'j›S(I/͍p h\Cf1f38hNzY)UzΙ.JIHҨ^Tgg8NG!k T+JB$YR` ̢JI$H @HB !($#0`҄%A=ҁD>Q""6 d`!I1+N!]0@ T B<# B  b B,+!p@8@5 BD"@ A G@؀ B!R@`# B!O XV B T@ B@JD B=aYZUdccV )$[_2g7*l2(뒳P@sHL…; ]hAOC@A\#.TZ/Ag2iHC$!C, @D$."ic&PHpAgC,Uh&wFD.܋h)&h tt6SVC2vu:$GD|T dt.kQVJrK~HT|8M(9ل8aMϖxpn +P${,>(%~-wɸ{ox[x:t! &tC ]Y–B߹wi&vre=]w:beԼ }+gm*{"8: ]3"3zf!;5hD! @4!c>])zVcXU`FXV휱s9Ud;cMXO?r@z_74OxcX @,D@"͑ 0 Ȕ@!2l#30@|KB` dD 0d*FP Z0dJs'Gxz"M"" nΦwBM%WVKΐ" h&FWHjA!DϊIg}Hb,_Ȉy@ =rv-""D.NaOާ@BOB`BD, ‚kYd(I0.J0Y`HC~_ĺ{s;5YffmX0;Y. j=[MГ3bUr -'" E=$0"wNlO%ug[+,p''ud",r dnkO>BI*a\5WRXnb`>W\ )S<ܻ>6fۍ$r1 GQ?:k `gc-L#ieӥ.Pq4&%$k橊P@ 2O~eX_1uuwަjU-5(,w#fr5;MBE[bynNޕs:}x]C~e F:[`gơa_#Ǔ`-o2d Wl`Spm+I3z.bES$/@ K\"mRPLȼE0Bp]zݒ֬*DٮEr,Wڗ%Xxc'§Ql|'uHgeBRXBqvbFEAq 2Xէ++ka!菜 $*o`ϟ! U7LJS?}g6pM"dV Ly!u<䮹qYj7I@b??Tvw>l$q㸗] ~9Fvy-[AY^J ֒#MEwz}2SY;(P4)ZcaLs$Гǣj !IVu@,2o&A 5v@ -$ 3feB ``f@d   $8FED0KA 9}c 302!H43!5̄I@0%9,` ,@Q[2"XK$d I!H@2ZRhB &@FIR$ 8 @ XL2\ ŒX(B(jA0fJ0R 0fd&  A!HR>$PFffa `iF$40*@f`330;&dKDa&h2̌` ȫAs((ȈI`@$DDFd@ @?$%^L 2 52$& c"3FA& .`0 A  d)23%zc"QJ$3%Fw)Bf2h4)HJ `(#24 9Id g2DfB e*dH0fEV263R Li2n6l #Ē&DfA f(Ȕo&`f lM\` !Ǥj*e2'D͙d6dL;nL3"R e <, l@&[E1i1h2"c|Z|uTm 7QTWoMUV|,|L'4}Uq.#Aϯ\i1;'C;Ciɴ/twqTML*BrdJw'2sXVoJ+V1Z4L)EC36qV߸Ibї٣G< |np217y| IYRfd 7-*/&1pg?r9Ѯ=R=}nDfN5iѤ ;~]\q 8HLu"3l:S|Vͣt+kJ*}{E?jʹmoSȌ[;n4Z(#T*0z 1ao~?E4> ٣x6bƨ.ԋ0@Š g#E'mfFhJ*FB61$jg01‰m/.܋xnRbkQ a³bE$n^Sl3 , O<&Eodp Ox] yX_QBB@`WY5oqjy OY8pݑ96wʋtڄ:7ߞܨ,FuE"YeBd'*ⴥt2N떪jH( #~+>6-! D]VN֬b$q}oжE`vs ʢJy8Q}`K&I"Mao1Mod -|v:uW1y^t|M!x@8~#&\0S L?@6Ժ*Eo+oa2#XÆWH_xiR<d1d|vcT~bZuDɴ3؇ IG"c;4GM UӔ:˅qFirvw9? q]2>d,8\x`6 '2;[;BTS;%gڃ Ʊkʸ{=,{Ik}pĨ#rk43!I͑Z$l8 P]JIh<58C*5 m  wVr5;c" BlwPNXNKJޑu½MޕXy< Ju1=ВD@(PU -oyL`D&>.OI <)uaL~/)֟pX`hY53mRX6km2`*@.ɌFaj.4~sdDD 7Ӓ&G7^EFYi5-k,p,z|練ouk.X?Н~.^SDE򠻍 ɤP.1('&?NgcדʷBA5w[𶔶̝:rq!#Ȭ~,@>;bLI#1TLi? 4a% s|9[jcQ4ntӯZh|sz7hO2Dv2>n1R:`yy58YtO<;`v)-3=#Fud}`c%0 ÀWlBhL2:Y>₂  we0-n\\4  #"wD>u^V`] DDAS0$b?|eƵYcϕWJ_˴h?lRwZzu݃ ~no6 {WMο;aW@U$m)UTT$mţGԘMo8}'A1[mU{Ӹ?mZ$WLeDf*>G0Tq8Qh".B 姸%/EK7r?XX[9%SfX;C!W}$BdJy|(ԅB$#"Z"-1̵|-7-^6hq >P$\X\UNZYu:Wj 彜>fSRs~}ڳ ٺA{,JG1 yPNs^7 ηz(]DD@K]4핏jJj:]{z 86i!aV*Q?1l©K 4'#g@H [8+Jex f%1v_ ~ f𙏯 UӣG8 QknfñQo}. I%hN+uVRěZ\~௡q/}G@he^cf}f_gbB.Pߥ妗 IUֈ~4zx>aYNҖ%4/5:H*y5ܖ:%@ݻ^,O y;'΃Ȱ!_KFv gaQI;]b,t [oyӑWc#@ݘ١P*4*gL+}Ngޫ%ERljQdE4=+'ٞXi0o1E/FQBhUigENb\ԍŜ" W7$>r>6c]bMi=/by jZ5-+YkZ2W\C#n`]<ȭd%A{|p# ԣVsߌm dWyXzR>)Hu(@$ypI5133PO呛s 恫?pRN2KYv; "#}s)ؖ !I $![CɆɼ$y{ րT(pp1tEBԦ0*p89 `8f*} У~uR~+)ӆJibtC2^bW(ݷS>y=w3eKo9ɝzC|s=\YUNB1K4DGa$n%ų3L؊&u* H_*(cX^æ47e`Dל=68jd+JϒyE׉'F~t.FOpzF+%.(.y^\Фf g'o@x%D# "Ef3 <# Ec. T*5-ix]iÍX-C=hp{] kTQͽa$>E $k*eAKE4iHcG~T}c`~$D}I}/ܿ1<`R_t"mE3vfJTEx^{\hB`><2J\GD}`6LgmF hSXʤĝ('W觘[ =F}%0Ռę0:k$v% ga=) 8蚠R~`t3)g5ْyfI@9iѴ 62u SG+EmΜDe' @3w׷@Ɵa3a^2BF Q.%OQ Pj ..ۙڍz\-]3OCCەw7iLqN,&/@۞sɺkj4V9L!Ɲk(m#I}llfyASusЌub-{_-Re^!pj®%P ܾa<"P/ K{/P;hTjHڝHmG`#rv7GJ|6EJl᫈ B,[n~HY `;K~nXEDMkr$VڝSoLԭFY6AvYsLrhbVtKMcoʔ "Iԭ WOBT/ddPa#A!㌲7H -XVXx%߱  L=&kT5w9@ #,߉ڧf6tzj^#AmպdR5uBơCds x9}&iy\5#A%ocnB,>LrgEqg8S"Ac/:Q8jjCRǛgmk*f&K #:FkxZVfwTxћ ~&lia:ݚR ShȲp-uR$_%]URՀ 6 π[}Oص9vYEqˆO EbF~)k.UweKJR'oS5o"KefS敳sOXN5Oo^Pꤌ 9bsQt"k}_T/eW+]MAp ʝ+7 fT). 4捠gحmWlwCI|#U-$oj~^pN@"q:pP[/75*%1L>'`^R1J5p$r 𭈆3c7Z*d<1|7eILo=z W2K$I|*Laqt[` h[^ڲ ϽҤ/'`5u|x싡2L jݴŽݼ(*ֶN$5MAퟱ+ `Ȳc@aaHId7!E7:\vvN|1Q=jѿv>q:@t>jE{v-xM,PIv8ŶT@Du hyGu gh&vw1VC8 \fό P=Mj+-)Pxڴ$X ˛R;ofrL&"wM1 n;>v/wYtw}ՎJJl1PDV$* pޒ<k?pSq֍'#d:%OT 25SZnWa ;=D e"Nrƻ|c~j ^/ZS?}olel`~cD8 ξB"b.$Ze/!~s|,~u2亸@Ze^mWjyj/|Ak W _pxnV W0ճ=M:~$t/ j__D3/0_cUlwٞa\5/w%bSviK*ZUFȬGUDkFL`.w~ޟQZMr>4RP A˱aQ*u (D8?b>2_فZsQ?r+(uP:~gwgdXj Py zȥcϴ>]5#\`qf`"?;Q9`"Pl֞f&lIqmS³ #Q}QޞIö[KTNJ%faX];Z›.Rv`6n@8F=M+4w9N/dʅ#-+V: 2km\ZA8hnƀk/׹ڡ5Y 3/ǩ";5lt~$uW<@%JG ]n=KbL XETPbrˀ@ Ed ꡢ#^p1\;FG=~LF;H Gwm =CK!01%lyN.BY5&We;16Ln΁̓t72azV.ݣҁk0'AM\ `c*vy2,G}]KRlnS)Rƀ0[6*w@'֚͟zIC t<`ۂ#y '>_SvKK{ĘUz,%} ůƺuN>3jN Z lH'uiN0|O:+k7][T&rDrgHHt qؘ؟"3?( =?;B1ƩqB^t$tB~AJ,~M%Csq 9 - 9t mkJ`2ůFf'R5aa끾 sZH?uYUBjYV:=3ne5P~ӈ Ỉ"zNcNe^?t=ߏտNR'”.MtCBN,]{)WboZdͩ+ny+slWddg8B=\T(aq:H8F7D7\aLD)dnp*q`y&av3M@ש?fڵE~4N.ʔͼDp$X^"z+也-®b+U=!n \d4|Ub #q>8}v=S汕)@X0wW`1pA#ڱϗg 6!J=o`H#""RNX7F$怓f7DtͭYZf/+&yu.o4nq3t+Ϟ8gIȣtQTeQCysYkxGw'IKw>cfSބ+z*Zd]?ƥAt+Ě|uϖ/4VAPQG_VI.Y>Q7Mb#x,L[@ rj5d/< k5QgQWi%|6s){ ?0uW{<_tޓȉ45LՐY]O[s"1#k^jύcJA` &Ŏcӥ'MkLkMJMǿb(Hl?X`OEMOAv #r{(Q6n{+U`GVkz}nI+|Q[KF,gk(kv%9=+TXNOF7gt*n,TҪ#*\b(EL6VJ Ű>d@& | mRD>J{鏂=H[*/usHW]BU\K$bUbZ6? υW^gi= kTʇsdnߚxf7dy]" B&(:4%uߵ|`%mjS7<1rfHgݮn23}qPG6 Zu/iV*$*0K;ҿ$Iŝ /!(N9"SY= bXT>RlA݃\ŘwEvWhRۇ.]^I)C+ʣF$>KʧLr ܸx-/>اo~RYZ:bQq*VPHs!5R% 9s2$樇-*p]k/p>U^ci|Gl*>W4a(|C3b07 ͪ~C#R7c؎=:7޻R{nD].cq-,\}o4\ĬDo*Lj|_|#cSB3!#$q॥8Dx8NV4)iDף>  t(/ΧEa/` w AI"/ ېʍ"|+~ _al,rZCqMZvH{xsX h 5Pc''ՉAD|]ӈBJ*k{8G¯ؤ1jw+9ٹ pkS^_} rU\Dy?IRGh,<\Vu?Gv nףovɼ|O/`͢>l ,.:-OP>ƉuYpR?S[S*‰n~PaĨ)Fh[Љ[bNՁa%m(6va[{gBV7FV(NfU)ЂkDnr`.QmrKuӌj6VDʋW'&&_bmǔn-5?rgJo/yMtu_גD\Ҵڐ Pa,*A;W$".ߔ Zlܹl(K/0$ȏ{鳵ktŊ %)B>3Z0+EeX̙.F(oQ,;d߻qS A}ƒ֖=f +srUOv[{On=I[$4Gbc XG>}~H7z#z0(E/2yMq6z~%/gц773.1Rm1ۈgd;d#2M~Yqxl4N@S3mv\t;ՀLJy@rM@m^H*ݟLi-N˝3%/h߰WWT>K-^x0豽haqS"mO<4{fwVX9CMYp-jvEit :mr'洌@di\iu`a e6/qFH?cZwTT }("XSeOnI=7PX,xQ>^5d`s|6n{_͢ޙ/ZS5Dn^Aё۸ZYڝ%JL|dc{%hLI! ^Sѽu ˧r֎fFV8 B+H.-y4+6L`z5 -Ȕm޿E&6"#?b-'t;# s78!-8GhbQgf1{Yl]z^,W6-i?6 ~IִelJ|9EgIOIϙ\j w̕rQ|ȓ8ZwfJ3(Ev~V ö>mBD& S]O«Cj%+>.p-} #=fŜӴ<ن8-Q, +HM>NrF6Hƅ Y3_IAM/&KQIQf *Hui-Hïqah0{C`j$΁u/Sg-YZkUX M뛎ϤΘh5{/!~x(dɝSũ:Ғ-QыkV`RzaT'qt:ҷFS6oeCtO 4:. t,7\'cۢ2R#S`9$Gp" @ Z=vۢ_m7 }먄|y$f!XWxQ+b +,a.óL8[ma>Vvfq97_ؑSL+8js |CN(ΓiRum`im͗fZ:n-O ĺ5<\.N H+և$uXZ  ZT;71'O݋|Q}nᇝ-XH޷zm{{_:B,%8Zc7v΋){3 2~#8c aOѵk"In-02k0,$ /Chf]%K}aw kƨ=]&PcGlo UosgF4Rᚻ.87FbڼB3b[.<uH U*ܠL48m1PC,kΔp%ylf5%9Z~/qH1TH&(BrHJ@Lb7>PzNeG3vof|D Ӗ_&Wsܸ2l4´H^3!V;U%yo!G]D!I!n+kZ!"(4*al:RG9Dv(z&VHO?s;ZH\&4X/y ԋ_7ȅ1dlֵdmD2)aԷս+0\2bm~ h,ST.JtzD⋑nؓto0XNQW @o"\PL h,Tj;zqxϨpuJf6={ m/~c# 8xF5/Hnf4E@gbE*pVf RYjH'Q۵ IY5*ؑ71҂?>7NbdI8Yw dr~Gٮnd 'y7х/&x5rQާtyӻ,OFUXh-^d I(mI~إ3GM/?s}{Oe^!Sq}/8KK; #&'矦VvO:Rv1B^ZÝ24ĀZԯѩsiZ&Hav2C ,6Eɗ=WHKܺ Hr9 ]HϞ*Ln {*g澬$x la6ot{n{Q7IU4oEVᣊWd e<^F?cg=crzbi IJWbdT9hlPw~Q_< >u=֔HӍi MzݎN}U.= Eetc.z`uRIJR6ɣ[ Ϝ.y`k&0`vĢ[L{6ߩImD_p%7W8Ч O> ?m/_4m{2'+ԬaCAN. % 1s@({]97~XץCU$<˝d֠Xe~[E9 V:^wЗж31/\Drl2PJqJXʷQ.+}ojQjԜ HR[9J&>?+z~ɍDi<{JŌVAiԾhOJm4s65{rx )f}'@1D/`]i0cљP$y=rK2[n$IpsYi[`QLQ n}ݔBؽoYaÂ>þM<.EHlj/ b''.Ҷ˵!.7sNmh$+} Mi蔟kIX\0ǘ4Oa'T+_)ߡɶ&v{[:Ӆu!5戃 ߽{ y`An^yM3>J?aY;R-Wf+'=x-K':{j Wm0`=މ,WS |Q44UΏvcP8˦ߓZ,L'L&gߛFDW7 K,Э➝2Gk_AfDwu9qި s- A?"v2^vmMj>NH^U +k፷.s@K8w/㪠w%|Djs qL'\1\(pw8ѣ?@ᑁ |NN9E_ bx @J5/QxLP=A ѝRµ7B })aLAW)t[\k7\a0}q>GU @뮻[ml\΢<3Q}rB+UgmxW{ZX)YIa,MjB?6"MZ"syb$=1'C]6AH9]c q@:^#oy $]1JSR:تƯnvpxo֯,ףS>U @u2S-fD\(h No3h߂ZXӫ(T9C ~3LDNldELɹ|K!_.W{܌.7jݕ{9=/r]%+ z?ril6m#(-_lOe`ͩMg=g:(g\qm$CWO;)q<7̉e?IMHFpr^ޞ"Ōp|b@V L]֤rY@<73[']r U<e@\i='k4EӗhPGA1DjSxye02^xL.IT6 88Z6 8ӧX|@'yIpJK߮GOjN]/a4Fr}td Ok?*bd` 009ZJׁy|:\RY {G('dCrN9t4}\btpUNi/Ž<kjV\:MwOeӟ*=4_Gpo[]6U WSTc%PIkF1-a\b>/l֣|]IΑ]77sSםSC[h곋OoӁf0b9`}ݛ='Q| 6dzm,}PZ^ XFȑS_$=a1h&+ƊNRr7Oa(Ⱦ +D걉q%.>V #mPnRzAFЧI=]e݅N$ZB~1R'34x2!Aɿg8.b', _?= EAחnŪ>Б0xBk 6,ͣxd,SK%*7ieIcRLY?|TKi_c-d@cǡӰ1{p>pX tsDGz?,b\4@ umG#@ 'rvRu%0kh**%}$A#qX'e*lğ†vIq[xd=aي0D՟ՎMϰ;)f$qڬ |@EXnY}t-S eq~N@1N~h\"/?91[_9 cPܾ,bWL#/`#~7Rg0QW6hPn y(;RX;;U;2+l:BOPNz3fc2kYU nz'Hk!,{ !X3D˴ | 3OĆK:M*50Spz!g#pP̡T>enSgI~ T#QMɦk,4LZ^D+b;=++&oٰ]|}z=G%r~aeF]ݖ롓'mDy[{|& ) jimkh{uuBd"\qL;h.NGFNɳa: [&< gl(>|T@0/;P;-m?$vY/4w4J膬ŏ2P@cGm LV* YmkmKjQ!Kr,+󔯣)K^O(+Z!뵞Z•sPghS,p[_fy({tg)kRNj_J?&B,x\kD5kf-Lc NN{îekv=|~'q4Jl RI#9ћ( ?7z$ta\ ŠQUg>B=a [ѪgMYZfd^}=9:}e[|k6,bW}Fܣq FFqWXhꍍΘ*tz;|y{xJ9ޞS[xr3F48B$8KQtTrcvUxv2qxdM6,J|Ig&'YwFM^P ?1D6UJ:{R?ICn:<|NLiB9#J32R**ۛOuMЋlv:Ƴ(Z~]INfenS̛)m <]o9a,e#%" jwx4<6-t:q lxX[6Åkbkj7ZňT}u^*w~>oΤlSddgu 'U,i 'Ct86V! F7GfϩOvm1'kvULd= $1t>T_N 'Jѵa/of àPG[3PwzGm?y(&w@Pln%~/7bcR.G½o$Dp*]<Ɏ_.\Gw\ÖwW_2vBuŻoǐ+\~.Ȼ@TL"}+!1cQw#ѐB#!9=9"8>~lOf{R;g2懼 :aĵ`6%Gl%αSsjk3W=GVa#.K݁jǀlWr߶Y 92!YGov+GRg'Ai~gbl?_a)Б-n6"xxQrDӷs%V3f/J Z!a uW }RbvMmbAyMH*U88g^5}[hbQDޅJ~O5ۙ_g .FZ^0Sġ{r5"j,fJC,8jS5ȍ"֥2#Sq[\{q5 J=[z[Ww]3 \6 !=ɫncL)fk+?? ;$ve\reXF-K xUjy>Y5/ 4!"pPNϡӞW4uX۶r+~ᅌP @NS:xf6y k"ǖV͖Ӱ8Ex0"eEXB0xeLgn_2? T7IRɊW5}1Wvnl0-: |A TW:sr_Cc16ѩUot$=+ՁQ6Z MuS-8=_ =/w&،CωHi!ڐj 1OZvf oDRh.w>>Y,9B%U$\Rv%hbW*X*q1DETHt*:CVe'҈Lez9rALj6A&щ`1ܔZVebRwOO,#VJ7@@9*@s#7+1+={{> ^7ydXP\DX!C#ujƮzLۉ(ႀo P"Nnbd &ٵE'R=~L-4 l <1/rj<"28bF&eѲ`$$ΏZ;cŦ2#;I^yz2 )F(:Lyڻ WZc,1ͳ uSc$5ix"6XmKUZ JJɅ?SW.v1(ur +MRkZy>N?z;X2>-C⯭oZ?zN;iaʺ-PCvi,>|^W0{ xv}cMe>2+~?r4\ >hM2n]ʬ":Ng0yB([)ϳx+́#K#z9 hi+XU߅T5&&rDw 6IozRy9me轱TPOv@~n -;e&*0e&9d(s`>֭ЄtYv{OEwzTm}3]u0,9<\"haKr&wð!NYն*ֿbW[tgBҡ77K1'.4~? B[U#䤁nk]&95UM֏+/lp͆_!hxG@M񕃽Ț%lX =h&²je21NO&Peef%{JbkN4o03 PMu#y><'ltlV⣴ #*o QؾTiOP#܉g' OY~#ޜZe0}Y;Ϭdwl$ HkJ=ŨTh 52РͩoZ$ hO[bZQU MT&bjZs#9f4OJK+ q˸^GUُdt#f97bc#\[gJCPw\f=~xqw\ hG+I} {iCc# @Vgd8,/E0IZ֤H%<ŋ+vS7Jg\-2 :#3mtDϣ%\r@LNI|U/7` &3|' a(k;v_؋=a 's[6ŷp%Z]q)@[[6g*~} 1(=7}BVi]2LDs> K~ $Ÿؾh% M/^CӃWg1e:6a#O4ូD0JrgȲF8[^u\87KTv~Vc41Nac,}jO bRU OO;\CU0b;Ѧ]9Kޘq$RnaD(XXtBze[{CWmK2@%3rukB ;V_\p#^0 \ )" orp).st yH#t5D.nStFkū0Gsu\l 5Q=wU:g͗˧8p/?~ULQ)ٝRcq}5pX$mjXqp=I5dohZw5&+yۺVZӿ)%FגPujѓd((xo;vdJoocRG/Vm9a4~L2\:0(94dJa 4$I: u6󄤸s8@ۣͦX$5~?E%'u N-00HJ 3kc[" QKw$ (7[֪f qMEE8x&`fZ8ˮm}1w/ui۱kx1(N>38d\tɪ4H{;+)-2r.!ipxU\ InQ=;dZx=P}tn|5! Piѹ9k)!%jizKl _0^,ojYnVcKUF] <3ef?(ևL Y=8[tYq۝KWto|~0_5Yph.|4oMZLMshzJٛ8ڔ  %EFY[ǺJphOs3l6?7}Z594bGFّ]ڈˣQԪWU vh:eB4jc]藜p11]& Aà4KVOrhRX /qWEQcx5s{#d7,Fk֊#EoCErOp 3fc fJ,1O"oY]yC]]`* i#]Gӓ}mi$kOl?"Gspv7fIob|l1)_涍bF G%3D1"%d9̕8(I56bvWJ45_y5LLk/P3+nz, iB׿LJ,f{ Hd;!YsaL?TɠawHDp,+ Va*hwZGʓ= b=~{qFRxYfe 'U\BfKV#WO:X1DmD@6}&STJ+nFeLV`6_Kߚ+ t:Ҥe3t.@P h^4UL1ˁٓ} ҸIf|ՓKLkR%$+ZeHbW; 6"WHcm1#+Ȣ籀894FWt'"VikfQȑl.dcn '; L׎DZ?NJhBs+ a8٦Me xېiĜn#:^nKy!vy:i`tu'&ӏʃ9^gwY]]ϳyJ0hP4e^mt}6I t uýPlBù.'TB};&KIaO獜fPBá?^B'8<5NxVi+jb><ɘW|fТ 9 :Do~OT 80\aX"Lq }Gaon ]#ʒ'ea D۹Q~ndNy&oHr(JV{{d Fh0uaɚIWACѻ3o ƨR$"f&LB+7z.-R~svqĆ9,y5/$:'o Ŕ=ΈCǎ ½\Gvҙ䟘s ;wz"v֗c)*da͢x1a`.'Zx:aF[r;RۦHdB-~J F]^ϾF>™;KHHD~[ ѭ7"\ UjQXy3Ch6~E2uʵ]j}jt2=7)Bl+ha"< %ʊ,w_n<+2vDBxT5[>¡ ۾ ܳz{' zrPsc;e!T$2^ #̬Qܺ?QxA/=%_ Ryl7E o?Zh,[dwu2Ҷdt2M|hNhjbcE;͝M&烨6ON0!ŘEm<׸YJuD}rt1T/Rq'kIy~.m? 7{j1@z+3g9k:2tmJId~p.T|l`Yvu64F<>ޛ)v%1YKۄԨ͜n򬢅ygRJuyDJg2HM=j"j)a=;a#4\.@@UY=c>ׅ12U,Gb7qe8r>y=m,' ޽2}cozZ>a=7g[[iV 9 P X$6ʜ,wvLak}Ld„uTK8Kff׽}@mUھmxp`-Tf+q yRW\TtTdh+BfM/s7ñ1{r-%rFj愤Z`{ׂc&0.܂ 9xhLφ9Nv4"jzsͫȢv`SNs1بqZ#x[/חqk3.nZj2 aY$_#JB,F~@|?Dk_tm.rAzKqGвg6C̃JtgO"C/a7a2.p>va۷(fzhU2('x׊x[eui(tŖAx]+$Q؊2_Ldd@OK@LTkL2[.Ҥf\7,x2>ߣM$ڃ)I~gJ[ThssbuThWwEAN0a?.ZE!m!md>SUGX+=s!,axx-o:@nœͿyko8G'<_~ƙ  +d [ӍXM>` Oeâ=,[\}8HA]:gRSW<@0^%SNثeNg+r?vZ( ܌{PƼH  &._/krWLf5D&7dv&U'ڤC=)Ať F;@G_WuF|oy7RV6L-Y{:˅ u˄ט_W yӟӖIlL$m?- A4UYKi,r9R} hU(Xe_VV_3l|cx8Yu5) S.ۄ3ۏnFlʳ4l]DjU zѷH"Io}h} 2c<0>&Nxe٢XeiqId궡Y=Uß[ge^MSL 3P96Vխ)TGUNmV^OVReD-p?'rn/ȁ츁xM^ Lq|h'$1"rV7!|q6iFd`N~hhFf;:@2yu@2.YsiXuP$c:* _ G^`zx93wpv0h.,?Btۣ+;٨:Xg=&4:#V>lsѧXoBdunUYf}%_NgE^k8G?)|bοH_b9[H`N_|iҹhdlTy'1~4zUV.TOV&V+AӴ=myN%tܕʌTQ vp录7muDH![*mٍ:\Ba00AnW pIo{$K]T(;z*y/Iˉ6 J;rL#âYզؿƸب4qr [n9 $_͸O6GלalWț9JU"9õ /Ifeٽk ogx8ΦLwͮ*@ݹ6k[mKv~cƎ(ӼNxi̗ł}I[13ŜόcCb{ 4Oh_k>[7GgDn`{B+z`KFnHmJ{>NڝBi3=X ?f#?y:6K+*)fmFġ}FcKW-+@+1$7SjUGN.[fLͪ-=(K|WYݯ>HoFr"$dH@ n~s2Qm >@>ԩ2ՠ_a 1Nܘkl&zpBUY‰NwZd:}-5`V wZTW<Ԋ+mWoI&Ӈ]_ɱ8֥Rё6YHw:a7.eu~%[ّXe#Xq$Bv 8((>-PҶtwOzͰz Wnnȁ ׇjb2ӝx@R_@H^tBpnI(ӗ)tOT-~#faz8=+Tg4~ :WNLlRE僔K;k&5<9 fu4>Aj0?Vw #^/YZˑ/OAl'϶ӴMpINg:#F'4L~e:o߫Xdtof.G{mߧPp\:…_M,oC?gR$QB+XRCATdH1%: 6&#e!xne|oUDC2#k˿yi|*+5-jZu:Ο`6|ͥUy)Lmѳ FOLi/1A[=ˌ/m.o:g*f+2N@lTx)&Oo:MK=4]9P0#2:OOoU@:@z1+sC} PHvOp0 m- t$ 3OЄ$2#䇾LN5Ǚ[1,:cta0+8auӲGsyQ$y1pLߐ %, O8u7Uɿhҙ:9Bx+cYGk+,,DLUn.5I.yMvE4ty`/zR07P0L][ f>7YOz;u(k {x"EJߞjBo#}ƀ# 9g?'IcUȢ8ɯwз}a?eBî#0|HuX_J2sK[$Szh3#aKڋ?޺A(ச`0`u=Q\/xyo&2؏n~W.Muf-C9%SSbsӆkRBqeTu9U~gQ1D6+P1`;+'bZ i.e f\jkԍVŽ4**T/gJRG ;oBhNOoˬwk#:]&s:{O:y!ƞF~[F6)k)oz+0|AvopAUN9,8S8MFӋ&J+V-_MkJ)|&6>v yHkIaSiHnJ(>y&ߥygA $W3èk=ERhjOU,c%zio7&RF~V 'ߚ9or!۠l"Dٌfr4#p#T@Om^ҀOh/< KQc3X=CTZM''eokBk0.ڕıΤ [3b+fm`|[3ńo/?p HU抩.j|S_)-VaI4(J]ݴW % 3%4q^]d*xe}C)..@co3`mI@ldž0-T5{*Pǵ=_ D ^ăGľJ76(*Oaaw鋙$Fռ؆u@e Aa2? ɿk,?kW:Z&hbo`Icɶ^31&Tr33zCsYdmXtJ'fIvHE<%~)@<B3> Z& f^󘼤+2)@gJiKAA;Xa$=utJۋ~/Cy? ㄂~=6{ PL1u`K[:y n BG%I:ѪYO3>oCJ/?*p$&(ZAd|^:E_U$jt/%OHp_c]/:V.ѵv*`{WD*u^7! jU&ame~N 0׺ ( 7ZxB25ꂜ$6 8p~Fv;Ū+r|Sv!*YEtD]7^1v0) X y?sq]G}@J S]42^o4DW66!.cgz-}xy_-d[DnVYVqwb+bwڻ͈e<NSTѭYZ7$͚rF:2ܩ\س4T:W÷@ Q ׋ g|!otjO>=u鵕 6T{ }-o:Nc_aFL9Sx%WkoXK zFəxv?TmWIA15K5,G[4qh'C;Un|"w~fOv%f|}%"߯iYgz[tuuL `R Cj5vńЄKaط{U6OE`u&=5M%Z&M~0_"iֆ3[|!1CDU4RU!I; !LH?w䌱"f06_ z}=yb*5BgZ@,OQ}{g/ >@TG Hiֵ9{$С 3 ˬ'0Ƀt[澾LjB̠RIHҠ(pΜrڲ.  'r}9dVCj9)zVOfKc&P#—ZL[0bC_okq/^I#mt[hϼ&/ي7swt8:̮k-;ǯgNջs/ȡmORhtvK5ֈ 9}0;La4}^u7Nrv-qSVMeQ)o=uÅ}dA$؝f/yFYGB6wk~R +2w'V ʥƩrKT[}SunuN0 il.v<h]v;ë%+9%G S%KKxX ̎̎ Gt o<ΌE v}2fe8-2HjF}A>;XkJ %xX[YUg;dKhіGkSnK:D347 $% w8~/~ "q@.dFymQW, b756+fl~u$W/;M>$[:3. P$kua\aYKu ŪG`@'d4_b&MWnT~~KW&nBMtԥw16 2oHNy3q} 8T MP5[P;9,1zCJC_ڀk.ܲ6S`%d0S Qds>+.0horSߓ_6jW1&Ɲd]kR[}!"yO96#$P! .]-K?!$?o`2_Y`!2u d6KhŦIIIdUBnOysSM洹M4JW- 6"a٠Y{53L6{%Bl^iyVXV|GN(EHΠS|gbC{&um͆JXpJ}:9J|s4 uG5]> 14{>ht0v1&黢EE]leQ"IbIcQoߘS>a 4^S/ C3(.VZ =k6Ҏ$cYVMd,JO!E8Mq>Α\|lɬqjI-$wu&[Ȉsraw3)x1h ` aŅ~׷ *O Ó{_v͞W  Q:ҳtPP3n rG6#>.bcbYRpZA.*,ܫ܂Š2,;+ Nx+.5_ C*BWy M'&PyGG(e 1р33i\joy-6D"m"='brQ>9rCC cܖצtTE:7 |CeJ0o0vF*iC2l_ČVY-CA1+oBS~\|:V|`{`݅T*MZ+ҒF6&pRH4ꐋ~ Sŭ_}x4tBɄĉ@{۴&ƓD'|LJuQ69^5 ( Q^p X~q֥J|r2O"B_LZhQ^= 6×kFw'4r=p 3A!<̌KbՏa[a[FH[H7 &v.Ydz.`wm-4c^60ҍ0:&ȣ&=w¢ΣXVoDWRZ,z:k /͘?oÁyYr_,\5O#q:;87kL ycߴs= u `%~J8>GgǛxVݑzUT z$,fv>~moX.&I\拓5=$x飝Q(sJ8({2JFu( YjP؄{/טؚă|_O ?Bޡ]K w֖5ze=xV`i?e,UJiLogoPRx ̝zN_=a>cC:W>|(A<=ۣ"v{p(\SiԜc)!=@H Tn}y3a0GpuRL5Rb5 qu YCtUA.тre/ciSTPZO z.e'Yy9^f9aPP[;cGlyc/Nr ūx?ijA|0e vcY ]j-}BhHHO)֞GՏ[KTi>1+ur:Q&9Ġ[gfNut%bZݣ1?9q0qP\>D }ReFAHR>x1jz$LJ$.N6q a2iS2-Y~!BzuG\QW9lUW[iru!| e"@Q'Hdd+BD?c7ko.,ϑ,d 5iS][a$aiY=Mo=ѫ~f}j~|s-H+H"S|+~Fkq,٪'~|p8ujTE07]B"IP,l0>!6s@MN͛q֕[1Q"[-GmJvtmN{SuSw*^П77ɟ>$(F5[~Tb;"4ذp[~|Ow0ݛ3:\|1v'y14N  3*<TBMp' o* udY3@3nF{OSeg ݹDY#RGLg;t Q [spGh@dR"i=i!OXr?6%é[XEjz\y~aF!R% 2'4k|٫cn@ H䧌SfSzirzdmNcYл&ZTOqLXxߕbsT9$ZR}5NdI~3X򖌸ZjԛO Ue]{I,,hd{+ƍͲ/!\pͮRzEhsS8hXh0~HH?c}"4IҀ- RfoLIqbVMe{|.<ہSDQ|&)ʫAs6t E>k:5OlwrVe#ְO%ZF%p@E7dq/–clXftc44A6% yaJ=DFB"w`_ٺro]WI׺Ж;[K 13tjE44mFzZ}MZ1QGk*oX-_PׯG2MV_EK1 'l) ey?^ۄd+VZ҈ թ^$,"}$ ;智-ͨc7܁ПW1*ٯd.P?]i36hE0i(\P(cok9lLI6(v7;R(NWUbe]i7dc$v^:(K  6ͥ0e0EdPF|\&Xj54o`D ROX1鹅r+]kjOe472~>7xK}}?b_+ӏCn6}1xy gZ=j•[۟7)ށW[oA m]~lCڜsu0'|מ 0T+WZg1ZF< s#55ٜ!уˀ*7372eܿW(E#'8Rl,qF{` " L *P|X=QhGqDN%әhackqVTa fi(_+ uƋ=дP8P89[I&nbbGhV4OG뮩5vRTS}O._gya;%f.$ cMxWck5=~%?~MɗjKm΍4mƝ#->N=ɂmH]\q+1"" *$I|M|#/GgbGB O|f#SZH'}N^5\QYXng`3IVр.LLg.GO_OYca.,x B|N*3:-4>qW4GvݔӉx?aL7 M_1ETF^-kXPa{rQ\@/ L{ 7MS҈嘟HmKlBfNK[#_|fP)HŘ1b{&̊&Ex!v2>Zۍsj-:Ǖq#*{NԴӅa NyQiϢfT8_l*?'3=M%ސo@?ToVU@şv%L{!#%b4~)s͍lAP۟J}S@4 u΀,Bxݓng MiJj#2Z?֢dq "EۃS<;ޢɉ Bv{Z3$Iz }zӬ[V[׳m"Iw2qAܦ[BY\TqjPx]xp'gXvt夫ƠR 5DMUYo%TOB9]ews0i.|VƬo婯eDŽWlk{4xXT5r.ESB *8 !47X$T+)j@V3`ӛoa3a"e&1RWB k_=ԖGi漹vY׬(ep"oR,Koi6J {Y?M>`]>H̲2.s^]nVUDnt|ހ)~%tM6mN2[wFʕaOJÝ28>мд (^`6F Ol}}ʟ+` !5Ӑr8'n#cK6yB>unu|AjkI>L#dt)NwF)9H4u˾nRm^睼Z |ԖEp4:R,m~ UhSuV151tKl{'}2ѩn5!GA-ovFPJ2lNxHI@E;)]p&HAckPy~ZKy=WMMT/0C`2!"=@p$ )4gMmRNj\RLyUri2PQ'>ev*>fPx;JO%$P0SԒE<)bRCm*\Ym Ŕo{gTGɲv^?Оs_zǹm2d{C>0blV4z*/`<`  ye}(y=nE ܛFLQzW0|0̢'PW#Q_c@8d w g34gr+~.#4$H @c^b J{5[Zq3R(^uc\P/Lax*\7 9fne@;3h)6qV}#3. VX=R"{ ~D9hFۃuW0e+&s8!oJ=lFӜ鶸;Ȯ< ^.̓oIrg\uw (z,㯳'UDTm)ҊQ)+.KV OߔM'Rϡ%=OR\4ɚf9Zjk6z)LxiօKX>4cI]6aPTuZAG9c*'D=@ӀaiHH.jz;%XRw_FZpU-B}Éq;̒?jPÁ?oO> 6/. -J܋ n䋟/᪜4;}ݥ".1ȡ,S^(i,CT&ƚt1! ЉwiО ]3[lh S܇M^k N5fz77w 1 tnH:Z5Ʉ) m\pi0̈́ns@8 .l #Gt\L$.Dy^r%nF+SV:(ZY.u D.?|ܵ6X|O9(F%_?fզȨ BL, 8X,G{ҿ%]nnhóENFj#I 'h 3+R)}NY oEHL UB EŢee'ҝp1Xrz21]nx%p6s=R'r"v4̿zG_OR 1KXY[1PP^F3v~nb2e<}F f7L1^*/bUrsg_zpWG1d_DJ4/]vVU/d.f' #X@re5sO/\)vZa_v}7%XR(;u/@/T&#u ȼ!ʪ ţRܷ3t@;ݚU-tn7q(1Q+*$?-dzᯑJߚw<m͉X3q$Q)qK{MMѲʘ/ CK ZhHq=#0[o2 MrsbL5{gƿDLqbow&Kavb j,Kՠy˪v&W|5@en)6 iiԼ?A'lwOY3_کUi*MOHߜOjეdCcy}RY('x +JUm4udMG?VH#??ivg<-!oaj-`w}?QBcɤFa Rz~9G]/sMIDt[0#!TB*i Ne{ɾS)ŗlHX l -33ly@[Z0z'2޿GOЖaF&QiMCL#8*Yձ=R^]}rTSmwAUjtY6?UXHd.Lqkcl-A0FQu76.K*ͫqdGk|8&SxJ;9wnLL}WFNHHWNp,ybZ?^.G*2fkڥZE4Ki~ҁ+.A 7! ЫU8KATײHp \!_]`Uvm{)]=_Ix`]~e47Β2|ﴩl@>ω|q&?v6zOh}{֢|Iݡ!2n|6e7$4}iQrrџشD.¿ެC0=݌,3>λ #k_v(h3'(?J5Tu$ʭF[2iG'Ee'aʙ,'̾0hdnT4k dAɾUՁQ߰+n~@ܣ%8n2ŴZM 񹞒[ϱiZbw)P L.+Ⴤ<ªU ږ!tK_dC&I]Ken:ip10xs7TR_4)rkwSXEْXΣcd:fы:**iˆPrP%^DSWX~4QSVLtnMQ7!q~f3P0OS5^Wok\G K9Zh_w|TL}2Fd{@㣿O{>PYIs%[uPm'3ZB+u?>ƒ;Ć-cgZ#{Jϒ:/HCx:]4<['wװ@<&sNU9[ Id^iV\RzB'c؆v9OK: dGhOk?Ґh a!6 ;B@0376R6WU!\Ug+F3yPr uBqg8x^U^K#*wzZt+o(Ձ$㺌aS?Ț$s{ ,Ka׭s gLfQk'zXszzf(>A/0j)l<P㪴gOï+'~̒K57ɊѳN ?q%Gu ~MpPGg#5ݗJ7S:5UfwA!QnrL??,ԝ*k:fR+,M-pKIa@#[o<4Y)OA%[p6Nrb=z -ȖiüA36;(n*ܞ[=Q>=Dwɓ/|GRN'QSvY7,ߡ3,M}8וeh՜nV܂DOδh} Dt$ d o&o!괦CaYc4@*IJ++vq]3jE=*r Bơ>FÑsV>4E,-/c>xnH{;g?V97/cuV9Lg?KC wwݥMާk߻엡6XJUOgѝk1 &ב٘\MibTUҲzF1Hv\H 7>IdYEDzY,phKw;0@$h:$˝ p b`\ueB c8_A. qhV~][)QML+٬d1Gip_pr&ivGJ7;V{#B{1UDXb_}W3o|Z' c;%\HRg?i2xx^"4ʄ0H^Ҕ_!2jPxĒQڅ<$j@VSPY\j&XR37X&9圽mmAKmxI4N+b|+[7:iXŎhLSVw l[#Ҕ `[$Xd0O.Mq_kzAa"guTW}G H?.Q]<)@Qs,L].ynUL';TZK"5 "e4@sUlgZX^@fmYaetxh {.ɱ@hB\D!$F[k)g,;SRApO_jOA/_,Ux(:Z4bbBXN04dC,v4_9ty^ڡQ[b?AN*S*ls(z _)k_BZ^2`Q_ tQ>hgih\QE˒Z1E|)zZHF1Y{0r%N-*y8z\5VD:(?Y݉ Pp8qSYuj@Sg$KPT>NlV߅ِQj窦dvICޡHwxRT<%@i No&E/{&vLI((228_X]'ox/Hqžtv 'LПW+|oi(\NZ/>?K+wW\9i8 R?nZYnF'-uԭPO5ȧ$ʇc]8]| 3o)0 %6aM,Y"x?.j+k1e(fEOP6BGv1jݿ-}'h6`ݴ1x5:WhX(ksBh edOER ofGӸ5 Z¡*XܜCQHKu|)4 [ pFMT׽a~;3GEjT ŧQ+97hy KJ#Ɣ䨮 ׅі]׀[wJW6EFij-.k>Th}^4r3Bs7]r>8k}UA6S"dtҁ/*n!r"dtGy]nS#z]w(" hU]c#V21ٳhķU[sNz@j},MJ*KD*_wŗzhC퓔%GUCj`_|T9>yz2l::Ոvk )mؐ꽽4F^Qz>>fB;b3&0M&>64p 'ofutLuiC}k0piE)5 87vDدP Ar:5[/m;Lg#=;JU5((Ɠ `b+`}/t})H;4&-5%0u TjM ucO oh^燊kVG{,t;ݡ"&yL Gk*Ԩ' T_8 2a/e~96FFg1FvF.JS4n^;oSE0>fk:#&28~2aXר,vX$DS]n"=J3ox̸:#My;Sمnz%L"4Mp~6,bAsV1n%7M0h:2b~ 'q*Dm Zo2&lXnhP1ՆŽ?lkq5XtqF0* siUnɷ= ZASO.S? jD܏_i:ڜ"?66WNg"dFBN7u; aiAӰ8 ^~kVmmI_uLa423!'%w:~ZXrbQo~qOKP(Xvn"3U+k|=I?֦,b6& {9x<5V v%$t)m{|>;"(;TJ:'+xڞ(Ο-]7K$?S,*آ$?KKENYfvBտ*s\IMmUaWWMRfϋJM)$s[.jC)q=BNcU6S(sR?Hk Mqx\{0 h |&h k^!=$ V19~k Ok:_9$p&G $6 h Qzq[:`=nw@ۧq10? FpsRRF.>YrE CϿp;F/wv&pN,*7}As Zx9'{F>aZxsN0PROSYT!9`ECPeۍzSN-[kfX_n~-Cq&?D\"&F}8KWP6WekLSڳ/yu %ŐB083ȠUR1]]!=W?\axaM"M>܈kej8x5oD pYA`tS(sJwB?0rÒoj}]jd48\ecD'>eOq۲2ˇ7;Z!335gBva08'Ua]6@l4Xg #t5Dv6W dZx(EM7EM|G`3T!8' i9UAs=(]t TϠ̡6J٫,k_>ǰĔzzJSq^,A̟w}E x=QIZVwW|'㙚j?#A󏝽 mMp6"q66Ng28u)˳tNP;7e@'J@ | MG1_/oe>ǍBLSi,_L07֑jjÂUL%m3[Ō&rޣ׾Lta/?nۼp6*?j%\;LL~O,Fͮ8mHc|q嗾0-طokȸRʭ*Ny }o8P+[-;\7\:p1k\Ji 4-CD۾_U&~,9~e#hJ rtjl^ҢKZrh u$&n8b~44xS.o3ڠw`3r7~Y 2뎑blO+T[b+Osq&;f㋳ L3 %1/-;HY|^Ѕ&@\q ]ߒexW?I eӲH~mn0bVׂJ= ̜,^j 8}Y&:l{O߿CgcdsdFzpPpd;n]\<;ݛA3;۴3JпJ(o@eԢP/a|O|EB?Õ%MW+{TP[voeäYj\ۑA~r~Q]HƯVv>.~\z`5gVίR K>qOu<\Z4lKbn\o IIK9r4Қ D{ߙ|XL>f1RUlIiDt춥I=0s %E$YG@QXy`̈́ya3xn5Sh(yN(ǷH}:n4{6gh:rcsLs;l0e[gyנZ(|B`Om9HN8In/ nXS$R?|%§I_=4agRtI~lݱtj73Vbq ':}.Dn\AƊkq4Cڮݘ{P^X*Ê@;8s4D=+*ݦ,sY\L Xgʤ{ We/ n%=*§9*Vp8ݫMlMQ9 K(-Ho0JQ_E]æLa-S(,d^n^3""`PgC8Q إeJnJ @H ͛1$c m 6PI>WN&57YPV=-DEB{5VKʩcKm,HG<-cPsۦq&~IkѳqyXP-$jGm gE pT x],ր̎$"Q:}aWֲ.V4hY:2Fp:U63gjU!-U#*tYH>$E+ޮ /b~efB.% )#FḿG[>J򾃆`0DdxK{i9HW1(#M~%2Xe6:\?Utg:CUi1fgUp]ey˯ݫǖ?"NEɂf8~r.Lb8̭1S*#g%R"?8U[ot%}qc~QUV%(|UMd_{ǞTtlktn=jK$. (~֛<u, z״,wZjȦ˫WF$&WA5Vi>#DL >QYl$ ^jv4L pBVC^#~W=ƒ{t!S}`7Q:3qeN( Il)h#8ŞlVVI>JZh?<[(XWCrh'ʌ slvR y_> E?\$z)<,_^ gk~j xuH i HF[!>N7SWf7m3Iu`psJ\~pfȯ _O -0I$M.;$Y{:Bnl /Xr(_n{ZQ@M<[<Еm&>EI#+ڿJΒXH#2m*|`sc!s~MV"eO[ =fed~CXZu?óA;2}ܑn\(UISO v#$̪~"%DJ D8_;0ʖ{w}#o1{RE Ql-s U/ny8Kφ~X瞭{ѫB B~$YUbD w#YÎB.`G-\8(zSᎱVoouv['8xIg G`x[FR?.vv?eݶ_w5SևUb=S5?TMY Q?ĥ/K~xƒV'sPezJIIwG).7gC=ָ6652A̿O[t>pD{j3QDfX4#ǃw:arcn ;>\)KX~|Oja|z姖lL c@B UUsn{ ˟.X BUp#ԝ`Pd{ B4eW| 0;eJFal_+$2g}X*[a:_"cLty6^K!#$g6I끍nÒV)$AeFZ(I+QȜթ4to,Y=fIGGZR\W;`D9צ $xlD:EC ~f8$o2^wvn5B Y]ʇ6FȉfIdVK}e&K>^HiEyfl wIww4R/K}ӵFc?iMߍtYG[L(Qgx=hvz~({\;.43ᛘ}P,+E*z[EncNu yK3-~!38JEgVzѓNjCDL[e_l0ת-Eޢ k3:.{oX@1> tk߄RR~f47$2{CȈEKV̛T|l}޼زaa߶E#0)zwSJR4"<dS9"Xp,pѕG}y !$+zX[ %{OibF+3gSMDW`Tae62P_ڟWO{f2AiBs"y4"4 ީ}Ʌ:{6 HF1?2}Vq+q/*Q9[-K8jSLH7zuel7z׍9L5(|1QĠy3^<קD"nع̄'AQ7F{FrAIboR r7@S-Ed ;7Y9'*Nw2_ʥb/;Ke$X Z-v^H#N'4S `͹Ľ&{R؝,*ҵޏvw?^9c8rūoY-컔68ur\ءEReKBp o'#mupAj!칣nrBNJeET*]c.sBKB(NQ78WT{c#@UqtշvSwh}OSv̍/{-gw;[D%hCC#s>x`No?p" 1(NJbӲLU]ˁi}9/(ߎKY)jH,k0=i;L]T$-Ѳ=9o5d`eB:*~[B@£msxZEE YbhǮ53g Rtc\n\*lnkVXVx6ia$E9 boe1V7 fTe}`:`77ftڸWR|<.LnXbz 15יk S Q;ea{vuj)Z!<}lNźNo3}=Dc&)lft3(2aIq 1>w綍<{B5EZ4!@Rzsv%0%M* l<;+\iaC%Um"ᦕf4"p$ _rOU_CQ*5G{rTNrlTuS?`α/XW 5@:b[tzP36?N MM )!9l67qiyDx[3ʗڿJ.$ }9~ a= V2އZombb5_ekx`uw$5N4ΒaG(f!e. 2[KzxjHATIjEl\Jߑ9|?Y0;`~Hͼtŝg(j?2+< J?ܺ >#A+ ͉dMr=Tᤂ9E Qfr>dp}g;t[B/Ėc QN7n0a 5 ?CDЇ e8/_ҀlyPLThxNLTBş4VGv螳J#N[go"l٪c):Wc!]c$uI7grE; 1,~n0XtJ%`ƏßGQL"z.tdL> {ʧl EB^Wu+tܴ!Ɵb,NA9uO۝u*d:|S*2 SA#3(SN \z@ZWZq%qn`Ȱ0T8FcgV^$\q^R:8+ i" K$n_&L,JGX] 5 &&!t2 mtG[m*/kS@i5%k3]ޑJuu`;#$[\?57{czr"KWg <t+ܿws4U09%<2OF/CA,NRO; -,ǑtWeXqf>˹3-wE:C)#lp*)H! b9\wЙÞexQFx_+s *@xٺTɈDj20Z<]LOhņ(.K,'VG?oR>L>yK:`D1'|- ~@PșC-'1'57n6`LA[@u"RnUV]#dz SV^g!=z:kvN?No="WQl B NTFF[" ޖ@f"3VAKx{@v\˨VPYI0r#!ЧeaA,L9JHY009mDeA݀Tݓ`TPtΒ:.)6Р\eTCI dOvWDލW~ʜG߭xyQ-)NGʀJ~wѥ}'}8#*dmÓ[6B}w:G0ZwU9Jc$WvmIrޛk*j"ʠT:V@pĝNO&c[- =h=k+'#MB"m ooʣp,,Nl<҇Cw[g"- 3}"K4q笷w@O90.%<<߸~g,vft4%r;H((֟]8[6UMC 'QY54D[Ԟt MC*b0OiZ.Lgu]۵kzv2la#949[G`Т5/9rY$笡SH?9H??q!YF~*,oM]Td8ޖbTJC<3˹4,ۙԻF*`^xQWgM%hȀ` Lt!CHW^\z bS0xƄ6-%>-+D>2^6m6%2ydOCEYUk:~u uo]AQPG1\ֶźxFx$(}3h~wa[~jOW}y(iE}Uj#A@v6D䧟3w\KۈN_iR?Pﯜh4Y`}ڕYx,hƜc+.c8}5,g)jͲp * m9ޅ^ G"8?SY0 ڧ% ֙/'xtaBqp< mGr\}grF=I+GQY̔sNUmO]iDHg @V%܂yעc0wc?m;Ei80qO6#DP ɶ'X.g 껄X6!Jy5{Qr07Qu= =|5N\?9H]okcZ}OV􌇐H,| E'b0h6~@zٗ~n37d=)|Kj1VT Y8OlI7~VVQWJ!(4e܎ZCyB=1եÀsN f6; [XTb$];0}jq}5MȮHkqZc߹ ^Km_(H1Y6YsH]Rܵa\Eजͱ!'Bת؞_6fMTzD{ `Gthnwb]<^Iv*Aw ̹%VaSx,.>?%WpH^J]z#YR;SRm!^jxt $et<#wMGѬ#yF :w\%I)"7jZo9.5du-21}upJLN'%巐wTcMb {AwВ+u5;M灕FDy>+ǁO^q>3"Fe+c]Mmܩt|r8Nletw3=7ZWƚ#@$X9JR?Y!5~=섦n?;r:/:AnzjNڜ6`GrȞXzwke&;>wih#ӥ.jty.kS"ڞ5* " S{P0>c'j]?a$~Z%9]NpG[1Tm*Kj.[k'lN !Bj}~쏌@+ _H@+)8` dj԰A9%3iCT UzB@mgSpk5VDI{Zƪ7/| Etx7iMkIX́f*tJx)R!T,\*qlE,DR.:>\dkm~C3ou\!׭cm*Vx"I?~^MX@MJzw.@m[ws ZNWoGpʥl*Y5f<>iwVB ~t߬ oƂ/-251-QwR*_~QAlJ).Z]ݾ}N )';fYzx2I"Ƞ/o_6.S#a/;QQ/.)5Iy' ک>jHxӞg=<Ď߳9W<ӥt6!vh(=u]_%)cbtIQ,[vyGd}#z Ȫ#A&m ߲O : NHK-άc]}eB/9r^q7Ft(㹰GѪy8s))CΧ}rnO@Z/ScWfY{^ p uaEF^aH 3>j"LX|?ٕ_mҎl bKZ@7`G6nbjϘCuu<8JJ2`a,=2gN.(/ej7|@?I{?iEgvGHǓK+v2bPZ([/W^uJ*}tFx1)C /acRҍId '9 {C.vB,23:eT}Rv7{(l`IS"DZt:^-C?bYX@ IS.{V۝:\T j4UJS{}0\Ŀ=PoavApk$k˰QFk~Ie~yM_sřI&:=*bj=. ƒϘ 0*jN v}8'v:ݙmߐEcFR$..:'^Z !+pnqgϳLt:\{Ϣ?V|É Ʒ^[ߐH-{װ4 e`6c;\2"p#dŽwF۵)^b0EĞ٠]/K beg&6]X^h>FZL'^/0"˭T+:ऎm*Yif&wUv07+ |e)[Iɾo "ٮu$S9݆-ACq謴l࡟< =ʗe kO9d+$hRomo_wc) eY gQ9ezà"z Mp A9s.7uבA,?nt4LwnNlʫç}}stl4H7gVM0d уϨ4a*,ufK}72,q礀?+WMwMxq!{"S.~H9? 1E=_CWpA =k'{6@Sd{HғC.qdr6BdڮOnw.#k+(AjTqբ>1r d6=RaEo#&;,D`+:#^K/;QxmgfJn7ٌGW=agl3Yjޱu!R-9,#3*z;F>>ct)u:WϿPUCM{*pwjۦR@ 6W٣ =?[Gy]Kvm˦rcܟL}i>.㢆]w<ש(g݀~ v28@9o@)2ûх˅#K?*q6Mҧݕqy;wuJ2*2Eb cWJXg7.F!b/^#}ך,vz|eS,5Nfَ1߫Y<ڡ. GQ~*΅ ?Z^`9Hʮl(<> i .y7qB'c ̢Dc'1ֱ'[_ds[?i /^بyNWv$ͅ_rlr^QIWYB)FcE?\*U~!kd4G;⊷>i-[g{97W|*U9TIÓ%bմ,pAYE-,F7Lnm2L\4hJ-~3nb<޹L[y7ʜJePW+HCH{ZOZ HЧ`s×Stwl=`O9_\.nր8Lb%@Ip"(d2bcuTdJI:G3 RH){K^ŏ9${93eR,?W*%Eetu7iܭArZ9"^VuanŖRwz/=^~U6&Y&*8Rv")ga5^|GTIuWq |G귿hٞFEMa}蝮od}{|8szVE^B4YSOmFu"捸Cc qqPhIَ_kQ'W~57&O1g LeӌƵBZ7;_u;ڔmViP].f ׶)lR2v)Wo|G_J.F4`p`HaZu9O')MEgQU7+_>FV󺰋PRjlrzԶq$<"`RďHxJmk||LC/f̧w뷡|6&. u?mKA"m,cSrn":g|Ҽiμ(T,`!ljZ{}^E dVXo>۱ٟ-O,DweH<5!fXTH%zc:W7sW3oWP_蒪h,5vTɶY.w?(2'_ZIOc]bß% ϖZ,! Q)v=%?t\nHك,>̥?AbxZ &ǎT1Y;{t'˿GZC#"N{PBL~Y7`B{[?uT1`s͕De.͓CXWGUڛlƏ_B 88Vgsڽ6M3I k?̜x?}u>yeCL~~%6Ef,lM[ޢwR\-:}/8yv4*Ith1w,o-Ntq2%k*Cx1q 7{L10/$1s nm.C97&^A*l֔5iV]K>U<8ȗC|u9Coc9cf8$U2tx u':2YBh%Yyݰ='뉶ִgxx2R{J8ap]iPXM@;{|TG6VvmT iOMa6lfLڟF̡Ɣ4]_~!RJ%Cg@+.ߴovKQN';39Ua;yfѽxe٘0K,L>{V9?''5} |a[؞E9r]|&MO^NMW% ;~IjN?O R6Zreh> |۴"8w! /?KKd o'=>A'fB*2Gn0gY#ep8xM>!r#ܭA@_&B3͹΁lUƼLTAKjVbwHC_$n>۶a1voϛiT*tDn_^U& v$:/2Ggr[v,QG n` _(]6 ljЦ>`%6/$ɽ{$jB|nCa޵ހEb;I_"O4%ۤ[oCAգH@sy%4JRz9\M0e $ѸMa{'Qc5d/JӽuV~_ P+~NH{ԬYR/ZBlAVb>2j?](9|FFbq7 3}|d3%&^EF0\:ߤ]׭U")JGȦ]*W6{#~kդ[X^Xp&r1 czYDj2aKkx+/;-luxR{O>aغ{azhˡ3u f1akl+Ѐk,Nj'q,;Fg2yWb _'ΈP{n/=Hgwfmy/SX7>54ʚˁ{&Kw*dyiȃ佯ݹb,QZK۸A/0yknuU'U3Nt}iN?H IF[U*]rHx?Gg&Ch(=)~y?EY>,}JMR*=<;I5S%tJL~!sFz= y H _2zg^hC`@%e0XֵZEe_0k=V=HX΍*(e)=O~z\7-=m?o$TcBXxoz7$ፈh'U9E%vs˛E$khj5Ξ)eo{lc-?DΨةV dAbuk̷OPtߐvGl֒C68ϭC332xwfC,qQ:?+F/f|ts˦3JI;y}5µ_;zW&iKd95@4دBNӟSJVpҼ[>I [U>mC5n"vEKXuQuA;kB[yL -O_tGużO]W=,_xY|]h+C:\~ |NeSxl]QsAg)<%("ɼnSv7υHS="TMX@M}X aYpA=zcv]\]L@WbFgI!|<\-xLዮ͜fc3Z`H&eS-=B6%'=o7'C]\f* 8%IU&1=tݤFU=*ҪZc!];mtZ;APEQWڟ-"' sLTw$o;u9.'0!ػDãiycJmw|}0ADb-sT1Cy\_FbZgџ4yw7Yoo"?WӢJ| '@ Dޔp9/B[MpAC@˨\hUdƽ*m uK[מkq.~‰:OV,KW=5Y=Ke}OVil~UZ Ny]eEn_.#G`ϛ&YHh>WhkRd(/swP+Z@jX!|NĖq E#-סɚv_/1Hʑ6V=)? b*5}SH2$Qm=hN qo:⭦o1E?c]뢙rq+$>2vo{-C;>{2 Ɂw4x&8|6R}B tOUץQ}HQ+Z#b)vj)NV/jo-,].X>fИ֥%G>k FDv1zd#]KAXkhY?{P uyԞ%3?#Lgkvwm)<[8L !=~C[`<Н(s}(IBݴyBmX*FQT }2LwjNNE2Z^bLJ4zRC}kGl]>oy3[J^WK& 91 5mu4}/2_>J3'gȓ?2H I'"; 6KuJRM5:Z7.~%oB+>Y;9~R5hQ4ϾMWs:2~n Umq#WE#}􆲉<$='>& ﵌kciZfʥr)Y@⭡Hj(xvwmxA+5?[# BY;TpT:^Aű!`!U^D9 V㾡)w~oxۑyúOV>ΐj>-}ン 3Razmr~Q-\Zb[gfeDB/ŜSJ9/WEs*ScXn䖉DhpƏFG/򕞆Qu]N*n{Kw2]m./D FcIHm /ФdךLT(p =]WI+jlhsSy^ p#by51š::(r?o*/?--Ddp?`)Z}uL!8z'wgPofʶmc}̸dzqspl ~ܱ̗(DV~-xVʾ*}m&x~޷^rw-`w`S6 Iux*M qiT%|o{/v:QsM: ~{&$nܗ8SVdPn"Cx7-+/?-:>(D\ǥ2Kf͗^ʫ&rR{y^i(66n>XkXJ|/L)iW=xWTMl7ܴNX)vdף^r?r`_ߐIp%?" :-( 7tߏ\ʴ˩as-oZ/©D]zCq<@sM۩ G\1uƒА;d:]DFjq¦'SCe4,O"Ǯ h:ˬ[Lg ] te+\OZdX,:N&"#E~ת~47읿~%2Qs y1j?xI6y/{Cj=`E\Zl_0{Iv:qk2̿TSe$ΗciqnGǓޛg7~"\)B2xl,Oj33>0'm>=Ol2nc&|~@B-Bq)k=_q V_qӄk ֥iV!Dx2t:wi<1y~9(; hF n8XM6΂T275 ,F)>"(1bKYg n y6[7^HL[kK'ctIqN "kIhnudeqtGnw,_[!e#oz=0麖MKjhBJJC4eؙébԢf"\gɈ1/OZѺOcy'T՚&/bT"E@vfwXA.G}IpōI&Cbr,Q-?kQ o6I'0χ}ݹtjeySj@[k(}r%Mlker#]/SNĤ&^mc/ui$ SwVrBf )j4Y'z?WJW!J\1a>6 9^v{CDz+n2{5;*4*W1 6 tTԚ8w"q˒R EJ~JD(`dؤw@=*6I,I&z^phP,CB9.[-,,;]#'|~ ^ۮnLT+su8I"wuoJVq6 ?[=tQ&Y̧6!)Nq>0Mf׻_XPiVNGXS:ncSʭ9Dӌ/;dD{bڔ?KcK7t R0VtK(/7dN3*JJ_8{º;Fx\T/"+:m]"Im !(e?s@eg+8$eF\@>~ɞ3F HJx8YqmvmٵƄS߭)uWrsQ[q`qO%)3-̼=G>WBRޤ.}ZkM)/M1f1J#4gK>Hc9+ /7T)|.9-|s6;(/w>4!{Qw3gsڟI=H4J@;wI3~@`& FYۥAA̲AbZ'/[Pol֖vWj&x!Mr )Ðq/{9D3)NI~+XY>C|HR <w荌*j36ٞIӔ֏})iJ;®0LsP Eujlj K$T&b"7eٞQ<ٌ:ѩGUTgbώ pѠ(NY);՞#0&R Cyp]%uʏl5)c]\J9=yT-ݼ31+է6:4$y{UJX7o߻uE7DN͝`[]J :t~9t=B\A~wʚ_im0< }u8-8YIlMS%ώ'Iw;l9= rJH}T2G RF' Ӄ Ӓ+ms_[`cn:bmNUHwi7: ؼ1%.roxsHHriS\1)dN?,˛K?gTrU b?(;G f?#$[n F+;K}Q]`[MUf>K+ptNS벰~~FՀ"?Sږ_{Fw^gM v<4^YrtG&wNrGpe-$L\-Sĝ#+{Pqm)EwN6[%I{M6xM(KcVtc/_f*OC ivKÜK7}ˮh(#Ӎ^yiV3dD\et2)Щ ',т c|ۄb~5PN6![ <,H&VyY$.| \vVjjẓ7&V|^7dW[+يwD ]f n+Ip>sO~ t,e/qf/uJU,g~RgWkm9dz>F{EzYV)3~d|WG}vK{>ΙH.O,a[M;# i/G('$\ct_L|o-mgٜDhRFԹDJ^WGSZpW#E+u9~l;Jd Hۤ ]V"n9.>$t8̛^ԧkhq2Lv]P)a˅-7!Mt^LB=ؽ bZ8v21J}}_8UKϐVvϕ^*cM~Q(U[sq#Eхv:Z9ɇ{ԄM꥖l32㑴߁"EXwJ \<7}\<]Qɉ=cU ,TyuUN4EW_IϐO.5s+ǭAh\*\ &uKykHZZXDyv25MH \곷 F;S˓3bZXO)Yjv,T7~X9TdfoE]s!;znt}\v(mAUa{mN:=zK9}1cL m4*m~X!i2o3d#YP}tΟvZ8dhuyQ<#y7 AXL1xL[, >2F a%?fyxp:NAaȥY!.hsFq1/Yɳo٫z[]ƀг$nq#pX^Ϸ JB-7*Fn~'ӼEO~Ӟh)*W4d_04 Ż,ΨlsxoIeL" .Ʋ4^>OKŸms' j+J_S' ̺EWxD.c\m8#mR{efh󞮚R nw4fHsa tVW e]dd;A=g&1tdvnKCt*dQ2>^j?o ~ŘוFQg=[G7qy& 'Km_z8'D: pp x*/N413̖k?61?쥅"D\ Ņ%6G9dܮv(jSVZ!sVtּk]GĪ[1 $aSyezo"x7?wkntlɌ¢{ O%WCŸg$iGue=Li9)A#yؑ_Sd˘fxbTqE/R!, Cg,'0(!-7n|fxz00~^Qf|lN$ â}?y7L‡DKVAteaг-l)i=?"UyFRt6+9U3ML-%+:H>yV<@]s}ʪb)߱D7:rNḺu}NW˶NU$0 D1'r.ݻG )Ft|pסx/FB~c]9 DS%Wգʯ$cS!kx\ bX[{MB~zx|lqtV~lDD<[PKE,fSu|-L*gBRqχ-Jv,c<9q)Gu&x܁}0|'Cm M?G;ӳ;beT=H̼ЈWP112^n<@q͖BM${Y 4I:txM(t…6W}ױ<DLtj~$Cd6+GynuPI,wY1-"4 R 5fr?b ~I 6i@OY2GK<_&3?KWDq+\,u`w>Vf=(neqi|^{բ+]o-po4Cuz{ޞ]77?KW ]0{Xl=2y2Sy}OkEOSݿ$} 9nɕv-aA>mC9KT_7/[ .0ςF߿m9|7\{o;S\k{leA@Sچ/4t9c\x z˜V 'hV6$:|]jb<`Kn&8i?Io4G^[R)} \,r_fg8*_'*GR)oǑV -dJT$>P3C2Ul{][K _lҽ{6u>I% vkF %Ac)wj8kVJ GOv "T̙:d,8GN Kfp988g!Q򖦖Eѯ}|OD1<}ryZ!dM)*]ζ+*nKc\ي%gA(!tSpd@Vj:_؜ I~~XʼnnmٙJM8n^WRy0ֵt0&LmP/*ߪ+&wXmӌEK=ɣS?h׬= 5V QH-&ndf v>d_3la9ST.)&0+oY2s-mm|&ˌktR9 EF:8͸ lT_' ђ28?O ë=H#>T5BR8{wJ̗!_xEʌ,w͓I&-).)~I=mVdEG5Q}ꩿgGHY)/>fmf?ڜ7ns[iV,{ӕGOcԕV;;Lm5J፜c1rs|n_KL&Y+:8J(tţ FuXƖx\,Yz4eɺN 27BZgRkѷn%Q8d^#2'KO!F{U'ĚSkx}#@,~;;!{obG0lbO!k(?G*G]pDuԣ{KuOoi"+(gʈ?#~61(LP6{{_r*] r:|92|vi8̭d€,{^I .#1zKԈ VuhUr#bl:"p.6 {"m{~Y|+;s!<>Z۝['8˶,ǬC  =toF }~o7!\3)iB>Oț+Z+KjYT|CJi;1{'Q\,Ryv-d؃z)+ގ5|SX5Kj5GJH5oݣEn_Qa..W[gß'qesLZPDc\ LROZsq^{y^c;c^юiߠPdqrY}v|q*t7#&VZc-}si[g>KwTefaPrzn+b河E*ay">쓫K^r|I}%?8za `y"jtBZª UT^3J{}/wUC^`W ޺4E;D}?MtS7 )nyyŻݸ9Wgia.G.L"Mt5rSE|w(cn|TVw"c J ʵ_˧9FҌ?%(%LPpapΫ)YxV@Kػz^zJAMN du15s\T^ \]ũB6k"GI*1?CP~''rE^? _<7q#D@r9I CC9Ga&2Chj@XYhXܠc|K';aS]wJ`4ACMjQ'*qYk-~,!瘡#-o)S9JU_Us%C*oA=xnվK-lDJHqEE["_y<#4R;Lp3{/n\H[* c1Iڂ(gO[tv/do*.7fmTdvy٢|SBIF?%E3شRn:"daWOg'뙶ݷc!ɠ$-Tޭ q_F6LR`,__|-;% Pva hLJPoIKBM/&" q5DLs,(X.;N?;$~-^+֔(q.3pXjca=*:J(gRPq녈MwRdyrfTMܺj1_ad>J99H;o g湇NE#ޟH C8,qkE'=NlNpwjY~_Qe>4&lfERV2@2[PCF¬znI}#3 @y F,%Lv;E}'N̓ؐص79}~(Z8ru_Κl7X(]IqG"e[u2J4WhAcw7e%6߿#~q|!fⰎ0eZ F@V2 g'E-ЉipP1.M_VT8F~Lt#<|>:c ļ>rvS ֛:'cE@چT4 UV zw$ފF߳Вnzq$׺NtϨEZI1uLXh򆐠Yrx r;6ټ 2F!ޛ Mţ{猲VTp+6[1LTŞ;G/o-"mCΖMa Z~ Iq 9H6ӷ*( zaT˜/Q V8|E.`٢ws" `vͩ혱?_KSdSqq8JtX' ;9]ҌN ,EZÇIv"no”qkC&6u!Ȗg} :`~"EAӻlDv/9[ 6hM)tK{DօڰQC .oFó :fwE ֏^Z8-  @Zw%(!fq|x2?pr>Eϱ5#N? elB]ҪSq3{;g}*/uGN?Q!) DRiV +t`nb0(.9(U6?&nת׷*ĿC=XafRqֲ6SъS%{WXʱs+ lfw҂N⛸=tj:+0le'G-ql~]Ox]ΉDX ܌aQ 㭎>o\kGk1DW@C,&]fg";[#"7>܄-UR+`ޝ'M2;sExWO2'Q!܂({=Vur[&Gd0 %\og $'r~kSwUStKJۑ{Y6Ew^*EN^NWxY`EģOYN#g?C䉋~Ty*B>2O6y>㴷?l]) EM/D_hX:첣+7].=,ڿ d B{ǢhqD8yRi% :X3S0ZsC;$jX#ߖǤ[JnF\xWX$ׄrs+{[Wg_ %Q я_ HEe :y!e՞,h]h;\: /suު洺a bO𭼪$tu:^ۥvQ֮?5[޿\|ѿQ:-{ao d'xAUX&6gB!{Dҭ䶊]zl%/6ZF`NK4.QLbqtI;=aKvN 4. Z q[ݼcm,os4WW4"Ƴx鉅~ uaք7{mxidq/ (^*a}Lw55`H`L)kZlt>vLG  in1@|+ʚ$f0_ "^ݿ2' r_F F.d tLUjK;p}O 0.!)b-zJ5U0z+mh53)-\4fixzXDⵋkq;9U%om!joV_Rx9qg9ZǍ,'Ҟbc@=MeV{>fE saprcju\G>4!B2;ߥ#nz֢,Ip%*IN?7v}G_cJ$/xuX]E@h+CO _[L)!t嘞揟4-Eϲ3ZrGo;%,CZ<,]3ȜQDeݩ95"NEkˑrOZjqOZzkC:6B (;-kry! rG5sryf-yY\6}uz'jhR=!2nqEY">뇇~wB4JI~}TWno9pPױ]%lӏ#Xa1ϳ$!ѻ )Ucq~`AigwG?x+cs&p܎JpC%uXb5h,lWJL%dɇo'%-I>^ͳc>>C]Ŷ&&a=;lT(enٻlЯy*☌\OxSD|ͩ/M5^5%Y `!`~jhe}B)0tNz_rNޭߛ"vӂPqW+:g/ċ4C)I 4,gzƫQ?p|su'CK˱4G&s+h褝R{}Q޼Ʊ%&l\oEvgC~%dOAq׵0,YvU ,a5#H|<W';7` b=+2:8l 7į{n%o"mZ % ԛ7= 8koI({QHK5d(79kZ=prxKKSrПx"9!lo+M}&w*XL"5ږ3Dΐ"Jy& nsz'mebA)N2-} Pgq)?OgVA%v3%Tϵ=%0654W\S~]a od$pI52Lp#zthY|);Q;r;,ߥ-g+t OPsլ^3(ÕD(5 11nkV!S=Zo-VDW3N )Ľ=aswB/`g1&s˿ ՜?W/ӱ5y7?|[::6a0p˅U}SvSeLt$% Yq|+jL{ 4u^7zuy -kگxOlj4Uީ z@['U>i2ff#c|q ў)Ij!~J WjRyDŃBDEmWkf %K&7 WGRu>_sB!nk?]OL 7EJ~mU|=h<@e 7>zȬfP.3bCrPeП8tZHz-_W?NOtbq;pnݳ>~/(O&g(a\ Tqۍjd[rSge_$̗o19e:澗y?^,-rC!!f`-SD  T/.{98DT/S4ufV*Ou@)w|:ڈӯ?q =BC]5[!xm@ m{ kj_o;sQû[F(poSlT rJ~ kIB[Wa"3E߽3tĶ:왣2ݣvRw]t>r$CT68gz/e_4Lj7onyN$;yo}juzz7o"2)8?.X2Jxϸ}Vwm!+.MIfcf{Y? 얛 8gI\Z_.7k.X^%96RNWߥ1O\ͽ=qP{ #a^?F+SQrcMNx+u$j#C_ft[(Aq$& p?fS3sQhsFq_k*9yOd֛fQy]iBO/Hs.EOfn< =-"ï3X"/B͢FIO/t Is /`4 _n"S*S_0O/pR$d\cyqdW=͑6҈~OM i{jT;Rp4Ȱ͂@}Qv׶p,yUtԁ] bwcAu)Y_F>l-@#39#b6ׅfri|ĆkyhJ0Ao6ًCzc+3d™]B/RPۦ bx[,,- LϺ[ÁC.% "jJg_ In flLk%wB=zd)GF}3Kc{}JRG廛O1q6 ͈tB~y'gВd (쎙YV:G0Bg9:i`VX=* (s%8\*ٯdr9yN7JBi?(Jxs ¸8'n b׀j§.zMA bҸVU8#=]y6G_~`*]yC!,o4|2ԧ*'.D 'h 3́!aJ,6Kts3ze8;>nVEy]gI?*&ui`~vI풌Z3YU[onhn^1fmڅn*9o3/ZD+mǽIM./`_bH@q#Ӣh&)=N k`(Lwݜ^O*DBMixJ]9=MVd02|LԒI zCFWAWx>u 1`aT)N>F5RE+ZLٹvPw4_ƝNVui!%q b-3Y"bwq06b c|Aظ]xRT룔{y& \1So1U\H-c0V;5dU2.my[f2N6{#?I9Sd{)8?Y[ꯟbU^5|ģ;Rl 3e͂ڂ 6#n}[gVf螢`p{Y׻X+}35jiCRd68ƔbS_o42?{p;Gz/EL%fO,DiǷIBסּAV2ej^FJ~)%g~r]'W< ٛړB 㖱w&,scGYi4&Yi&tie|A,Z;X| RLe0jfhCbA2QeK{R$ fa-]PD../O-!glq}23SP2͡hHr tP# U;Ek-.'-n(:8P`LG`T}.>`o\}cى֒]^_Up+˭e9s։:h9A_'5b8ݕӇ橦#I š,X[|I[P^S繻Ux{BO_N9ɧX٬`^ }}i,lh~AfL6,Bf1)6+xȞGCЍ$ "|1jH}J5+;$W~obw\y%><ۀ&o7J 3Pwrj ۲"1+k5kEЕ2E]Ԑj1ZϤHM3r)jVP3]2v6I " ";rLZ*_<9~]lH w}H_ o/aԭKIՋ,Ӗ̃kd!ۺ?dZεtT2MĘT4;.gL&XloJUp_u -jI}M/jW۽[Wl@~f/S%XLR6 Bm c(mN˫_e{ ^knң=ٕZɹ\!Vһ;4Xʳ g<_ mԐF1u RBnLf*گp<Ԩ R,?'z6 |D7?367zrSU8ghN((U<bm:-Ns^7{¶d?@V]o¡6A3 u^(e4̅y3\vKIMl7 #$l)~.W0(O-̬_ q4.?XFRHނhu&\NڕPM+vpcTU ]GJJJK$(~;|n;K>;Z=Nn6Fیv$boqr /I%Q;,@Y2"A5W#軕-wwcq/1LOdV m+uvaZ-u_ν22 IXo&WI~e] P&◠{B_g0'jAMz%kf.Vr V_[*ذ[Y=j,q4af4ҿww%`ګQdSo`2b<'SF4t8M8vcV4$1Տ_SKV@8tۇ-si5&L73f38ĖgңU$~8+YURySΥS(rU?3z nAA~`gdS7$" (9 W0_%UhIP ιZ63o4x"+ m'h)䮚mzIm0\9 sWu!+?} φu)?X1a.7{2v찍{xz {KgUXw\ǚ'Y@+ 6l'bzey.|IX0}ț9Fιd`Iyo,T,<G>Qݮn4h镠;$jўAѮГչ.HmXڼZG{ip JbQ ,hytLXGwYlcڙB]r~"vbqϯE);_uoڨ`&GGn]}.= $Pkʤͦ).IlBSď_"9Q,XN({h9S 71;4h.&??{|gߞ'48U;Qg?i88]?Rז Æ]DZ[T9¿<@Ŏ~#67\yϧ %#,wyR˓xinj98]Mm}MV#?WL`gﻟvuQ>!@0gx^r)Ӧ^ۋN{i͓6KC< ]I tдe b] d|5a[w璘W~8`h4& YGHBD?$#geY HN;#E;帼K~WclJIikJc܅C^v]1HZ&EiQf#T9g 3gio}Z(memc2 Ȏ76*@^. ֏L;?y(/TK m z_~ѣR^[x~29JXBsˎȗ-cлҦG6 Lv8@dn ^Ga+>oOFʲ\} _YɼIɡedc(Ȇl%PtqZ?Ʋ )EB{bƠA.Mj~;& xBkgʿ^\TcXO4s]sԷ5#lwԠ&fMSv~Բdԑ=M N@G1UZHBlxH'F -|GiӔ-O=oJ+)=եl lqw2&iֳIǟs fOKcPCDF:SiM뿐 _Dd$c⸕@ #S+?;9,1D7"h9 *p3hd Ȑ_q1'ȟs{ ¼ihN9DYDt;=WI}i+VD]7~{RWHjU$ 鼂l_àQ-lT@`zbI~UDrɲѴܶ{C1Y]9Wc,hy\#IGנckbۼ%ϗ?~Y GVԫQUʕ*6fIlfOMoK-5aEƦΎ'l8͐x?:gXdG/@0kPCkIb{oy38cVcIgJaugp.@!/\)jČ@/Nt,WV<fʦ ?qPCBs17Jb*X52HYcH&mPHgͰύh+ - w+!av ʓ;g:S튮ZF!lv'-Ļ̆+<ەޟEi/8Kmt+&oZG3&<ێԄS0Vgs7g\jMCOD3[: 8Eثrӡ)KnmT槃]a_z(c31kvEalS?^.gyFCYF\koE|Ve [KZỷ@S ȁzd>6Qz4SM#.9]f@S-;'lm+fMFhHVa(v%+/ɹ?/b+`DŽ gǖ{l(=xD+؜JR13W+4n.#y4lF o(1B)dj m<J~G5TX_QIC@jeTmhs9Х1wg{Ա-MP=.t쐪5(6diexs e47أz7I9Mnmla<䴋RR *W~iL;^8F]?,Gg˪Yz@e3DRm4&N϶o5*Uo|5=PvXn 0`2/j(_Stֿ" O' PǀGbN]Z0+[@gIf]7]^6"bj7Op5'F~$mM9;YCfH[j!(Zi)Θ̇=jI3|Y`64@S|Us B^68>%亁|t}!;]+/f{B]ab֣ FEٗysFrtCk"fϾԝWe#P`?byZg|DChGIOhf 'ɥh⡱t?$)0onٷ[W`)z2>YNb6`:v_ߛ^h4UYL޳vT:X?-"~ 9\D׭C3p~YT/}'ߥzədښmo,u:őʱn\mjGUwѝ]%6<7o7>VJԥb5؟^3Eiۥ$7{ 1dr/xՊ9C{Y[?ሠlT1Keݜ! ׵&dJ5os҇gv#;'!2c">qkaMQeW!zGU_ߺ6Xm7*[~ {XÈZF`%}Qd)EކؚΒ8C8۝eFꛨ9F/X[dVnj%'L5vlM~]'?cZE1Ujx4^f<`\&`s輮Qdi@+QLމ}zG]vbMyɯO"=X1崊02WGwR9ٜ}_=gC[z CvsW9ouz]\V k. "~9&0zNja|W#oބ>I 4ߓLN$E*8g1jP}lp$Td*A "}hcRXzIӸtJLsp{?c6\ 5:ޗ`F.2@#:3n՗UЯ^Ih[3gsEQA;!X)q¥'x:.iB0 >c?jȉsO%3?1(=N%i+ 3|9?$<ؐ nYXl7jܭ;vLJM5&bz' r}"<#V7k@ߴÛU|R_cMuکk6 mnX_H!p U@з:IY>{: ;g9@new4O'#`Ҵ|QĀ$V NxPE Ko[ۿqk-q/hUGsr[_#g.zC硷.i*˞ ߚMhύGЯӑ:]X5mS /WŹ?S kw-g5KgV=D+:;"aT;ٱWćE}gc 5ö?\Z3_M$%ئx:;ak m$T]}ǝ۷.k;Mbs cOݻzPcpcY.tyM'zeSZ ނhpXY*a3l/_4u59,$VtqfI)60*HS Hw s-dGnReY3 v1^o2XY%_}qN7أMTTlG`q#+ d͠C%oӆtA^ 5,ؘ}W>u eJEz *;DnWƞZ"_SFvb. dQ늊DfŒNxBK,taA[Oyp0KF{,ght%])*ؒH w3 aJ2Ε'I7Ta1/3}3Rqf-zS9QvK,- Γdvs;ln{m-)G^'7HijC_mX@I/A)}I٣s꼘DDzLo5,<OϔDU.\Cg$")>DAlD+VL3PU|eazKbTBXhE2&!C 1ٮR^}S ' 0kS A+VFo#|;Vl&nOosROp13Y!A,?OK (c4%sC +tQD (JW: u %Cvtc@P3 ze#"JtT$b<ql!qU!~ LoXH+>dZz(~!VFymϳr]jpSDR+ǿnY fkP\޾b ʋF&4NAԀ{pPՓQ=k*^c ye~zԅ w> <"|ȕDcY ?{v7z-2+e%X̞(={}/z?L< =fRoOxۜ\A@ ^"N9J&!_~k,}?4 Yy?Z׵1ߒel}N1l@˥]N%x޴X~D% = >~Zūbp2Bf+Wҳ8@WJK6ԥ%܌:gnh&CaB|D ;u[=`@9DP[&ґclQ;DE;EX-YJ}EC.˯Dp:eN 8ݪm[tgcs|G.¢^e|kD#0hO'.FT[`ۼLDa. 1qɊo7KVp(Ftqa] c$qzU2 D 0 55=moN<`uհ@,aJ%v8y|܂C8ف eE_Y,4`u>@^V{ΰw$| 5P]A~VLRiΧ%dG Oz$.xOb+g>yk[.] <6s{|_OXBM{:z,pΪ/3ĚzZ)?ςf$J@&ֵPؼV2U+4qw5|uǘ"OP h 4SN2њTUFK*f\$nhMVckqv[{Rř$wϾWg[pV6serdylIXaVI.YS{l46ViXf5#/Ow i_Q}"a2[ ~Hc8ҎZ}ڡjo94ֻN`<r[{%|X(xG iMGUgQ .>D3#m2&I 12H52.Y-1vHi6FK2E"3} HƭJ="po@W"ׇ D.k쬲e"=sLD 7tBb&PIӂZlr:Œ> (ݪ*Q? a; 3o PH0Q[6U(*}+Y):-=t.Y1O.dS g$72ᯁq"/-JFWd4+gNZw4+k]'.( ytGC+9k|.klP9N"7.*")UXmGo87>)RtR- PUl$_\d7,9PX0T&|\I,3PI8P>$wTGl/,O6_?uS_?kҭSY)|*BkhIHEmgȫ {:\vgqGwM{σ!OecJ/,>'6^TԂl)D/-8\X:+rv)etu]ItSОShk şn8"c5/RG$W`?s Ms.f<_#3FsF|VC9㎗pm7=P~ !J۹[s`*SjP)GMkzap+$FhrmcY6]idkAS$Y[ɱ/:f 1X9ͤ6Zä,'~@ D3q"E" Р@RDDZ8$.D "E" 5;blgg77g\6R zxY9=D-ך-!7Ewi"Lt] #Eu +5P(jkhn^<. u]FTW , gƢ~d":֍.t^M=w;N59+(m&X;-5l_PS:[uU C L9DP{c! =Gml^;^{@oZ?cҤGYVG_z.,u(Nwd>0IlqtzfӸӊ]wQ19f.bp'AQk2\ WZ*XhTʁ^}Ƃ<P/`;21R;qYME fU*{I-"<'*j;lܵRJn1,0 ÛىP=C|{0Q=mUZpmԕVЋ9|臯rn(TQ#rNxM]D6^㰞BGtx`!-WB~֐}^]lqFoO/\ ;ԯB@uTf.A;>3‚\w7ijJ$+r u A.b5,  rc\KfQI޴^k48cď1>{?LJ|!vpP`D@7fǸ d-z k6`ӞqIŁz>_[lqψ tߠ`3g˵F?kfOYx5?aT9On V'ruB4ctb9 @,!ȗu`BE K8Ve, p.oaEOw Ic$yX{ >]^c3 1LNgR{ |3|O_%eB:k2E˾/g,1iJw0)eΤ ?)xk80ÃGY\EdR,2B ΜTX]5A/b/>.U;.6E63}9kAO#艭^=åqDxUVV_y 3>hݑ}!yNE ĆP-y_=>Dfӧ8f OL\ZBpW jH[>Հ!K0dD Zx B7 *N&ȼ`5cyX;k'ڵ"IbIdMuna~;HJ89_PiTp1hvzI=7+wӨlvBJ\`sR[2kf$hslۊJjW R<_҉5n-/@_BL-&&-ˢ$hmȱ߈? T`D@b$ ;EnX.WbEiM 0_*T>oJ_.t($/Q7 M >zB8pNlECKoz"6 PH (5UnjO\39dnՙjiQ߂.~//WN(2P@3{z-aB _ 's'Z΀a턻{=7J#hLdzvԹ` Z494ct1Fr{7 = !T|s1,h*sIg1p~_uYR ;{x[x_E0p;"A@қӕ= = %`ƒ-Nb_L˒N+Îj09ڴ!hdn@~k˅rS|[fN!4-k`DAm ~@7n68G*\FEƨ$cNh}F4-w!~a\uuY^ o ҬYU7 7BX#M4Z=-yN/eDe =0w>nax>}PpcӪwH0>ijΉ?Iw3"cX|ȉ\I.=.T҉Y TcPPQU_5N?3ŌLqqy߈S{@Z f%PČ vV{fm9x*9Xi{%L A.,9X/<ÿ!@ɧVHo|}-%&ǥ$93!lfdzG^(ۡ'(n #ݬ^S*[}d,v#O/"-gPT啣&Pn1^6qaH9ke:m^Hy]R~t#֢4DtDҷxTʆkܦo _>jpՇz/vh<@J1Gu% TIՄ$dؿ$Qtn`3F s6j+2ᘹCtrц=l.l[s.ꯨQ80Ap®pAl?/ޮ6h#;HߗRRP©w#$硚LJ=뮧r8YIvzEݔSә&q-{vjP)"l&4yo:>+^U"5mZ$^U"j@x:%uu\ ,ܐA tFSKlZ:of{_eu-0mBanHx b*eSm[m7r-؁!U'ZG BDZLlJ`4'ctl]ۂ8߁G %NAoe 03 2T=cv_uR'ǣK@F;45@S0 =Fo`=-6 ;z1$ː2zywEQS ',X v fww4~}/J^_ =Bͺ鶷OQ=0Q1;f7e Mܐa]lC+[V>6Աt ^gv ֙K6^}~j )7nT\RlڦGb/VMPəUF8Q,d '޼>6-}MKKp|@H@IзL]`}IK bx@Jm}Y?a]E3LUbGt*H}w }e-D `Hs&G((s70пZ|%q?a6Ci@rGl !k03^N3[6#_{\o|#G$z׻c'w} ehI]Ix)a\\M7Ǘёzq6"\MXkRT "e\a5l>f k6җ5b9<3LɊe3fDyo9D  f /5QWW<n`ijr+_H^T#!}8ЯET.֙"0sFd #ڡ ! hvE}.#Xf w{Ǣ)pGr>djX}STk +%(Y5$bf Z?zEy4tUyRgZbp(Flvny j_E {br/VS -ov/@ A#u?3͟f¿c S7]v[E_ጼAѧ;ŊbkY<|": 8RJ?<[MCǝJӊ鞄 HE~,!>W'Z(۠q[?uCؓW2800c*!_@g"0M'Q¾L{EA[e$ kDb/= 3eGn|Vu gyW(>v'hFؖȨ1#.L>ʺ?M673 v= AݣBG* 'pL0?HLdTK]]K'hcvp[=xVhFx8Ixe =35OV#6zu3ji_D[?7_CS`sN`}wQ9~-;x:&g\?8{Lj͆D wPiO{aB")IJ&&xC^SΆSuќMGőp!CCi( DD@dS92%=.yh5yBkAz{;v_1=6% 87Rwh@R?2`F3x^h0I7!-/s1)ino,PIDDUgj~|ץN§79[X.8".dyfO  ȧž^rqA‚ cSLig_H&߄T).) utmnV ?@,(0' Þ)pʣ$tvjWqe;k7eC/QN7coB('\o8;#YF~:'ض:&;}^(>3MYʄu[SN0 `4Wc@JL͖F۶઎I<T4EЦx=ᮩ>8&Q_(t6 <\Dd.9NՕo.+|t)lwԀJiioPY}EٶV̘af͞B~ρet#4Rٴ׭: Oext5cQƣ%"lĠmI o\xs\b)G~#t o`\W,]W2INF mQB0-V=n6 hH6jXz09ۃ7C*aĮVꖘ&҉B*.>jabi:ғUjrL:>EBX_v>lDŽVO AcS*0LeBEʬ4g=jy긽ܞQ-pW,a+d οLzn.g]bm E1y&%6/zwwq!~ަ@>.tSJVRqyE,L1X~p2#ϘҰ"Aob^p໮9-C%D~¨jp秥I<ƂNiLl+G-(g2ti/_+ cӆ}uMA??eMN⼝RhmFst^޳|5tÆw% צQФ#W T RIUtZ8{B#8`'@CewIh;}nym'42ڎ -u|ןe&yL0ɛk^Xh]H#Z6 @)?|] [Yc*hdn&qʢҵبkx\I/M;)X? E m^"f4 TA{ryZ IY|ѹG5"Ew񭑡aU{<""(m5>/գwCNPe@ "KvvT/oQ7ZL->ߕ{C⼬G6szK`BfH'u߳J.\dK:ƨ) lޯ)i @ꊁәΈZ]z sYEf%S .̕TYUr2h!JRT֋:#]mk7cޚe5[׻)?DvTgSw[1AifIPa}#҄yb8h@cSK=_jXhkK6ǹzr #f oIPXE>xuD=KmHz΀<`5!,4vt.S\F+w˧5ml._f p➳;(]++Ź(Js\{mBW1 o.E"x;:XQ3Ñ޼smiHG/&j9 X=o?{H͇O_-CdOlPߍ:q%Z"=Iь8g1ьB!wzEv\"8h__r4n#Xgh% ]Рs}ޕm-Sڲ,TL $*_ZZsZJxVd.xA3r#Zư!Dm̴Y*II2~*7´Lh{|fdN?pzhޞw!FC!0piwZ޳4LPx ǾéyN\jςH}0 -pD”n> ߋ ='4b\D-w.:~$26 ǰpi\]h 1wJ9[͖!"s-50 uh@SVиA YO = '#z'ko9P͌ iEν7",}^5&\zN?["^ -KngyO,ӵ-7'2NV9&Z~璔+f1r?>δ-1I@'ԯz<<ϑuẻ8 *X~1] e]ݨm+Y y;n "+冄ՇUV[;czX PākxX; / Mx!zd42ҺP@[ .p@^DhB6204p_65v{? k`aw2OHW#A"JTl W}̵T]c<9OPZ9)]^yRIz5_ifwJ](& Aglw()]pc5Otk7opOvU> z"MwNCŨBANp?FT."JC@Ti#g1M1 dӟy v2\eiL$ )ܶĴ2]ڮ`$WG#I\4v0ÔZloзnO3+0W7g t q>X]l %k4Jʓﹽ^К ;h_GW\_M5.zn /7W8裮ZYEn+Q]r9 G*4sS"bk<`xNsA~tuD O .$0QCw}Dw#zhy@l^Cdw/Ҷȑb[ 8A~;ތg>/v^-:ꭊW@às%yt Psȑ< 2kS6+{P~*CCGB hnX\un|+^O*Sīj:1m$)@HjZ߿G| $UA9|oqL6DJBfH=bpGױD4EGNҲiS bXYqfSACCʷ⨔88 4F'_h_sh e@?⹪+/dw8+AJ4VZsXDTb.m7vnKse5*;wN}_?i9*[2#&@N[ (/*D#Zk,.  D@^+ٺY DDwjik{A^W  ""-u{0"&Xx]P DL   ״2V1u6{AX$^ehG;YܚB"?eU1E$m:pܼ=:ɴ C `$冯 Ʃn/{oj7IZG6f([8"GX]]3kϠD^VެB9_2ԧ/U٨磳 3̦x6gqizjm }夃3 En5 qPuYv`<`yyn!&IgePd\uTn)!8R-Y,5 A pCm(4IXN67㱡2F[2b'""3Ȳ73 9-6-Iq - KΆɳ֚'{h o8!*ڑ?&Vް7eǬ )+>>Gu"Ox _WeE!&?{נ,S]hA>OsdsvDwG:mwUſݢ,f o)`bI_SV0rE۠cs M=D=1뾯[FZςlb|͜Y'6ԯ,S=.hSDO!.a PK|}BoUIl~, ,g,~cCng&ZT"C.S63_e!/& 7ylk9oPᏇ,]i.! KY~u&fq(F[WlRTz]ӀrUwDU@k08P1B*|sn@NQSYAG-D `a\\ɊEպ WtH x@+"ye1 ,N^.ov /ѷ E~=41JlRZ1Gѹm6lf25z-&n%H  =klbȳ)WO=QC',m`' + |uDE3Qu]^ Y8J7ы!1_Ankv?Z߭ik_OԈS]?X)s6*fI!m1x/B8Vv;oj+2o"qG~N=wX ٹ"-0k2G:8_yHc;cv#]gRϷCZ4`[Ir8OJ@_x'23Sݺn}#o1*Xild%"XZ\T5x:2Vy/yN? Yx}Kr+ )e}Fē8 eȝq_řDZJOsye>$ݿz.HP| *ޱX^8i3o,OeC_l=[*'t-koT[,"brq7A.]~ŏ&@Fw⺻z4iOt̢FeTMOR!<&u*rf1: ~; O&p٥!"YTcq{yšo^B #GX,-QKڪJ`Ye00&$*/`X@[܁70꯰7\kΆ_Q)*;rԾ`o?/$7bcs뿖 ގ:+ {HLlp,tֺc)J$AKfgmd}zrqEKC>9 u;k<"23hWV:k=@Av*4o5&>ráNF=IlP[Ljtў>c>O- v#S{b(;2~՗P VY$ӂ^ 2,'_؍ uPK=O;m@Tn+Y[z!a̩]31!0;rX"tq/A16J:MƵ/O3g%@>^&f"1I*>\_$61b$[R{ĆHCjfLV Y29S߱i0=r+*ԣ#d~ūDuVȁzJٶU/<TlK Dݱ4mhQFh:EX<`AAAE "-""/FodžW""tFUCiɥR7fctr)lgsE߳]d̯"o*ۅFϊ*c7lw.җm#rQ q-:(b(n0wnTh x4\]=|nLQ[o\+ca8QߓZA SF Y&a7\!)]& }ldwpDtϏ/WP9n H,3ˋ;;sSa;.{BX7۳uWѠq)󠒛1 qS`zv.N\TgP8] G0֍U+}*o8ǨCϜN̰B 4?g,=\NS%1MDdecIꮪNȞ VGrk$֫ {Um:u|7vƙ6]Y@v h<)x-n e)TnwZ%uNҿc酖m4t(r+z$f,= ^-\}aPI:#`7t!΃p`._ѰAԠqL\G3/ϾG\ەO,gv1h=yy/H=SL!X:# Kfp8ڇ(#ٶTŨnH BDͨl_{uWm"rw'Oy(i8M!űX =@21Kr% @SpK:r簖sRjiv.]݁%Tɩ";sK9GʹWip cG}[vIQh%A\͜{R'*Uup*vX$Ӻo Rԧl˨#d)tksPR ?Վ &29_E1޵PEkM|Ti=gdTS㨛૵C4-bcG{77YJaE0_o xW<;?;-6Y@""iX* -BVɱ?lWgz?Λ}+ ãݫom-oѷbOˋW۩^8)iU1K7 FPL'iK_?xǿ /CalSF?0 2M ofX!-$ Ī?F΍Dֽ wbWW/q>?l18t ΜyŠ0m{SlI -h?6ʺR`Q <`aX0iO\T .YD7DY \OCC6Lp)qDaf06宾x`0CvƐ]ioJg+2"rFVnf+ #|qsĪF?-]>ذR yeWޚP+h &XYmuf]tXzS߇W_U+ U!GLCV}LK _G?n`H|"wi"!Z7zkvRG*V9 ƛX^ZILSL20h$*ɛ=ޒ'¸s)bO$_Jr"McUpL7_, ĝZPmh)M>7Aㆢ}C1BnC{nK%'yGPK}=oAl@ "" j:y<n)HWvPhv5yD^P"fT|4`z*я.Gު?f\\]B{XsSQ4,hn9Naz>Ypm ; :V;RԤ ]0H *^ܵ}{_Z]K#0 ?}h1iM%XZ^LPJC }uD8.RNܞr++$4Lfr7~oYFrm aXV8YՉ kFORDTg^jg5qvW8ќ*uC)ktv05Uz[H2@ t]NDo(Vh5(q:sh [o A;I[X9cRQIب")lcF'X@6=wPNj*d[s2:i|-w3}"6:1e)rKu(1=+Kqg'%bͰȼ";1tJ4 r~ȝ#rg|jߴj_{_I{Wf%Л狺a0a4J;ʩ$iG3;Vyu`ˌ5. 'Ik8̸UY+7OȅQ1ܐF1ݓykQ]F_[%6'\y\S3Cm Dnb8cz? h`VEp͙UGr@LCˋޓm\w9x͎~&ǯZׂ?@)cifWM>gx;Ő.T!` R)@,&5PղiD}mo5o ^]Y~/LU5/JEM4kLguVY X_ԓUN$>\#z N${Ԡ/ E }XBί 60{4nڤ^( 4:'KJ($&J9b[aila2񈱮*s&9[nl>J,u s6J=XtgH,o.} 44ݒ'DK :?(Q?=6ׯ-p_|(oX&ǔoо7/FϮ#|N|DAߑ#T:`u| ZybV?5E E[JהLAo2()XRf"a\8)\TM˿TUaw/׮j1-)SڗjTCbakxӬ N8p"- wa~>Zсűn=F؈ɲJΝ +?^G'oϝ>:z:p2fum#AfZ(Gc&e S mh;_S[ HX>qQZϼF# =5дVeUwS ]B!~,a\gYJPLWm]#*@o !1{_ΣIhƐ·;~zQuJ%xji j[X\>kf!oL]R[uXi.{亘Wׁ7WUi` pӅLF/ 8qd+\#PD@~% [!Ob s`3$h4\=ۼ=}z~! @6]+Kp-Xvn~(wZ؟vV)UX>ܠCހ\Bfj hQ@XѰn?_kLCvR@BO[|-Er8sU}G?%QvUle3FۮNv#N sW`J*dc}< 3 d!M}/_- F2>J Ģ`E|$z܎t"Jhns&*JӯrLſ 0 @l'R D^rDFDMa.9țEK2eý ,7Cs_x1[uNJd 05uPy+K}%ND+QuG^d^3b`O*i`K[ DǢ!DcϺVI351 9(g=z p"n6rJe?*LJ_h^x# p71݇l!L3( Uqo"r()Q6Vp@w^2O ' HMfsz]AqqRk &B_b6Ӓt &>]Z&1 <*sZgkIlw$}ta2$*>վ\g2B?r)Zm?f}4{p+f0{n@%m,Ew>{Q% Exi2@_p6'4vyQVtlQޛ})^jgͅu1+!g]a"L;*5L~c(=+>yD?[*@ h2ke'ci%奶^mӷДpzx6~v5 4*.ki#Tѱ5 aM~N '!uu$﹗uKbl Uߛ>.@Ž?TeGÉ;q[pCwt4mb0.lHMƏ碔<\9m1*!8WH{<ΐWD8+&cqq45]H1W[K~sV#3@#D :5ޛj&j< ~\g [$kzJ.v{GQJȓ1@,gbn d4?!Z$䶶RNE7 XI+' |Zsx~)ڻA&L~"~Т{o eb1ms )\ޓ!хKMCҪ}c 0`Еl$GԾt P@6J O& d6-kJUSD|5'o޿zͤl`JY.?<%0MɲHtMҫ͝ 4qlÇj_i}e)$kgzCQЊ,7Bе:X=p٧o4͛,#Dn+⚿@6>#[^clKmotmߚ} i| D@L_,]=nR}Jy{k> Fl5Wm{ Мht=e ;-nOJJ< ktgGn(:^-WpKRG@4$0a5Я[yS{J׆K"b8O~<]2^Iu*WʼnԊozs S `R4f+[!@pC\ƕ.7$c۴-ut7{G'YZ >(>fE`b*"Ψ *bO3Mhy*:;$_m@ᔦ;%M*Xl%jV2P/{Q.sFˁ kM :c>Z_T!p`f ER4VG2ت ACnAFBks_*oGYۮȯxZTH׵٧pBoWhoJCX=n,|`~EnزcPdw=%ccŴg9 p6%l&%hwGKJI42g&غ+\>JqY1T. g{u jI$m߀nrN_Z ˹owa̵m& q4Qi&R}CJhNzCt`Eʦ;o<9R7 SމW"\EZ݃|Ȳ1Z,iUӞIܙ{i&|eG$Jf\q`7A$75{>E>Ae(8xU6y3a{O^3)/Z,۳=2i*j?KX(l>!X}dlw^HU|?8)Xi֎!l?v]-w1ym)MxB2 c[tC$߁EڇDވi,zd2;"6"8/:ێ 4? {N~ż heNh{O_Կu];_|.|}ǡf]K>}M'>5꽬:SFZ=U9Tz^[3O] mߧ1$ makR3LB#tm =<߳6>r|si dP5^һ6@IG,jy[ &zBڎLקH>-*x .%$bQJJ_k_+@_@JaA) Fj'Vl@8zñp堮5UseMP](kAC|Zi,H""BQ2UVkZs~̐ DFDDDk Kr]uH[U̽0 zfcTArB&_Dܤ]8ΟGãZczIGDwcBZ5^g}>>:dz) f.䍴F7'?w6c-XQD`i!b{R( jMN*? ~z3͌ϕuogH&3,k"kWiW1Y-XI3Z]x3摅QIYxԽTj?+b9\}2_l}v3d,혱|D;ē:3'B#tE)X00gٌ2ڍݒuPpFnDoj^$=@#掮+̦32Q=h"$Ō "O#;v=lػUyQ@zj({gf:hU=Hkd $q0G{$]bI<ƀW~x+ָl: хWOI=צ3*"=WS(()GLOCy鷢KnN9W4r(6uI N:8ߵ .s]%CWj/lj$dŎ(i׭89&]ٱՆ8_(JrDZAù1aT <=s~jx5>,Kh4N &O`0"@儌^TNBo&zPȀ D@^N""1!Gȋ x8,`t(/c>,L@36%i<ɿ&xO ([{ƻ>EAm&A ۯX,T[&H,vySNKˤ1EyBlZ󗳭G ®@ɵ88HգYFJ^;@KKX>HZ#ԃfoR ^GveH>CFĬORS%-Bֻy1>ĭN?M ^!irI '̵hY-iטj-C'zp΅o}B9n{OYt ӥÈev riv/o!hD[\c -6""n) Œ߄'}p_}Y0mXo=vVJfɁ~j4y=aʙ5_0 9zq3ei3<$ }!xm,ǐ72GFVEx̋nH!QE fJ+c֜;2R"`.M$b=e[WHtVL̴qd}T]N!)5 /aQFBYX4˒Q%{N $ZZE5`p|6Ƨ̶7w<3}&>D°kSKU@I5<:ֆVu" –?j 21D$ɝ3P4H> f=c#tNjR hѯ(c9rdg?F ] @h FvAW+ 漴Jm4 cjMm++8*AJȑ͉;aMGՄjBM@Uѣr!,5zhh@j%MysO[ ߓY݁Ffј́)YSwL[/h`:|y=j @GQ6Vu;_ ]:;/W?,EVIu{a6/j]BcUɽ fS=Ф1ع% S_/DN&ϣ*-w\zzgR235_$?>uZ~uúVU Hc$ ~(fF'n`yN>g9Y]\ @1@d/*2JB-Si'/jeV ,aw=x8DF^H l7}%:riDwG()z[8qd&@ ,IyѴRlu@n".p""$Ej0KDD?V, vl(_Dj й\Kľ۞ONa6o;5VLfA)ӬU>ZNrqY;&uމT<4wXcM['1(McH`YHy:;tHVIjszYc,Fb,(Fc2ш/\1k{VXY'*X5W#ԅv 0&(W``I*Hwl@^%} ^=oG!;֬mMʵ^~K Od'`~0v  4|]܇slpzqQ/AMK-H=_sT)]9( >[g] Pc/T_⠜gӦәp9Csb3PYfA+UILiAB?xO`ArDǍe|yrvFe1؃::O8g~$06 z,?r:q>||8L&m$1-T9~C)̬Yf&L[:J[{7K#2}Śh)FtaJhx;C}?@ILj܅@rG6 #+ g˱a"iUL.b<2wהXV,t\R:qg |{pÎ*<=ƛ]|>rXk[*P;sg~wb遫L>۹~~94),(dz"]ۨk#P!uU=Udk\уEV56~8'):ͿP)c^HU ?d7a;rR,kl"2k%*Kyx v160DP"fG&)H`}?YoQ[L24KzBH KaiDV64#N,1,Y%~7! BӹyaX=c]dmHc`69UJ.'fksr*2%[u徐D1oD,=PL{;c X&$EO5 ,AX(=^tb uYN@TN 9 \5ҟ"t {*ZfUud<%Nv jHKroѨTUhk9Y$I;I||D&}Px5 F`U$1Ohj !CO~䛴柙tJK/\oA曳ː\0ܓf0-nn?kPV`GX mu*=}Qv9R^ī{-L<D;oojW^] ҆˄MdRْ - w7{Cb#{?gAwf7Ⱦu0T\=?cnT8eY[I=N᳊OO&`J-MgYMR2@Ôj:.b̕; &%rY6у~5l6G|ꏢ'ζ v / ^h2E՚ D~oNo:!iI ,hu?p?_M=ms^ ~6YyOƬ1ohʛJ9Q ""-~Ρl]8&`!VL髚L㦣q'#DԵQ >ܮ'őC̓-)糔܄Rެ+ HQQZl~WC\88/B B.`R(4O-ְKT OߵiP޽52<ٛE$ypsCO8 qCg]H6.wK6pPCdCǜCf?L{uD({e{ Q;demVy lcrU[OP[lisAL a^YjvIZ6c Ny+YhPQK͜>~#r@YQ<f.Y:gD (8[ASktj6_;|O P+Y?% ژ3ޥp 5s(~\U+|S^?zT1 $LY+ڣ }#4)Ip]S{hgoYG--VΡc#s5Hk #1]]5à}gB*+oÿ02Hufq`O9] ni TW׳={Ca؎ӽPg0ФR?p4'S,mV% H#6tZh!)V,Qvs\CEA{t?s-eۊ+$#S+0H[sQ(+.((pV3o# #d}=GffM_Zi?fE>V޴2 /m6#ǘsWC5wHHČ_nPWnaKɊ{ItygP.a'fT-Vy25eNbZXΫ_ʂ;mЄG(87*X;$nBGnv,d ?1% % #7KcCݰ7U脟o7. DE?@I@A>+#l@(^Q#p=!2, =nf 29 ݁ m&oҘBVTK^W}uדG"Ifƌ:|6p6@D5]5W NXɑ_yiV $S\n^dDA(u"\sN^*-oNZix(Oh(RˬVu7[lHRM6\gun~h YL,k^p(d`G{dxW*MI1WEMDzDDaT V2~aX 9yghÌ&R>niQ2xҜc?O0ܟ8%p *Yby2wWQ F ԦϾSfFC4LX23xXh917-v$2[ Q5v}3K7^Eyx"fScD*\8δ(H^.QG me<:&f<>dGVc{7,iƪou^ƞs7sn1XqF9iv^Ͳu#oLb1a־bO0_.~A˛fg\wE/6_-Z|5Oc""mx9k \{29|7d'[-}I"+> f`*fR6lqa`gSt'~5e%i/m+ 2oQx3X],z{B&{6(tJG5| eJƼUU n%?˽u[3]w\703`0hK)ӕruD*6|cfVJ$gȹt.lRyZ4 2;hۧWGuG!>K,vsW:X%K_gs ^,U@vBY9 yp0b@S袈wW1abŅcD!6, Qj1X OeI=넾] <_86mJmB ue4 p|ٺ;f{k~ Fxwm8=DD5zME gJ׬X@5C<7bᚇCIz0;H`&&Ō`! \~)B30󠩲W Eq:DPbp*o[d'j猬 ]$ A@q|ڻAۙxo*^mK(KqBT:dOЄAn`42V/b;:PR׽$  Ru))_͇52LYFJЙ< ;;l;e&p]Fjź)f~&5:Zcv^RNVK}51neT=q׊V #8ph4]4N:֟q߷T$UٸǞ֝ŋRT 鷐toQfiyljJXu4ȕT[w"X2@ʑ0OVr#>0zqmw>qSP+mfeU!(ʈf*jjJLFpgˉ@ul8d*F(d*rZz2(:Ei,UPŽ_ K!aFMH;퓐_Qgj6Qm2~D|O?s#UjF[SD,}..o1bї~[aǥdK]?9Ǯ#!g]?i2s"bzF!t`%Ȏ0#CV %>fen̚Iru _Fhq)s< b1w8ӥ>]];v>م;LF!i k "9 U$NLg%%O]FJ˩"nXInN{Uz+oZiWyvA%͟prS*b؁r(2pEfx}xGd9J|.6[WlK68yGcQp$jAN t 7UWt9P+>s BH0(WnN#9 Vy9bnxeM:v$")U[[A`cLZ2j ~D ռnp#7PkUQ"'4#z0MleVſ¦7͐Qk,;ؔ)MXd^iBf~1'5=[/{HTuJU$-)[]}*A$ \T3C/BmK ԔޔTHVmK yZ]XNM2[{IEٮekїnDɧ]db]~8'C nj)^Z$J6f3[Lڜ,ʓ*L.kcZQ5XU""xpݼŝ@QBp;Qhz΍1%mXŲaɀ VTv2Z4=Q bg[D5W4a-E 6e`@ma؞ŎB$FbӬA>Y _%m,:~hʷЂ#ӵ{%t\Y @rk"0p̔<̀Kf;TXf% kuo>Hrtϊ`|X—$% uϰ/5<#3`?=3-ZtJ3Y1TVzOӂZ^9GMT}Uǰ7n"E~P? $~^ 6r}>-}t^q=F ,T̎OíuC 7I ӎ LA+Hj(uceB4|챭bP0?rCaWB$ ]/RyrRF٤Jؘ@,w^NtēA.5}Y,_gE;^tS&z}՘+0&l}.-` o4foSwmJ.ڟ8&\UK0xf hr J5@<((NƑL;Xzs9cu^8vH<752 Rc2(M^ }վɞ.Q@RQvs.T%/9r$v9=J Ÿg>`а J% Kx tlju q D G76_5=kIN0mwcW=' \5}{=19]. 4k"h&Tc"(zUMFrл^b Aº&țor>hY q1IxRA}pj1z]k03;""800ngxaM.r&rtt"oKbkZT͕5n 9ij0-\[3ʋP SBL9Ǘq+41%aS}2sqDf$;/! ΤVbffq׊ӞWa"ܪKSj CPIvs{9^S ݣrF Fq{Y BH{D0ƖԐS^ w+I>@UUQU䡒,N0"f9|[4nǵיRKԖϜ޵kY|@15ZYi]M?A| yI;SX$>IM^R>CwWfRguʗ<dUqo襊|'%xGGM IIG+(<s2y8Y'iҩj7,rψ6SvCyl{@8B.?-I;Dz=!M2c NQy]h(l dAsLJVx FF<㣤eޙ{Gg]4|qΠDj~dY+Hͫ ЈEsA4b=bҧ{1!hu'/NqgN xH) Yv@Rg_ۄHܘƚ1Æ/`O7&A beӽ+Sg>v{x̂ k (WqISaUè}dP>MA#t}TĎ4/O!MF\ȔMkRG2:,+R19h)|]IkLwT~6]3V~u!2EOazVI6pӪ"]:"3$P?PB @͚7:!%Lבog_tL[ss!:>^ҎՄ!?}J.[ {Bb(% ѣ6&B/Ω~]`B~+sBrlch2 t+S5̘@[Pr~cdϺq;¹:ID/}i&㯟f]C,dXūӍ_ # !xKueE}缽Ⱥk0}`ʥe\ph֜4 p(aC2]Dk&h)O/4ssMk))O?PƬ')hd;*p/x ,)\M +J zd1u@VG=[C1L7te& 5L-lL`!?(VGѵG;:xŠ~0 -D! s,Dzur6 d~$$5$!R}[^]/g@FYFBYV;]l[Gbp 5p}?rS%2|g ]%7.XkiڽҪf$L[](yG_fy'Ф)v^co(x7A-?שDZb[9QHֳp?s /Ͽ"}B2XG?cQc;S3mbhpۢ@=\@(ab-1aj[(~J^߉To}rL[~ԦA?ZΌkOR $[5DnEm2PyW_F ?yU; ,K\G/6O lCkVFÓE_6vrxm‡L=aQb!|DH2IU^/f\Ƨ]=Wel[+G>q?XyMhX1'~2-Uaےuzp2%0ˍy(jA10`&w2T^ۈxDBW#|ɬ0&Ja-,%aU4Ή⢱Gʡ>3}Qt 53BI;V<rYbc:"чq8{ݯ؁z_*;Q ߍXrLxHp(& yٗErfOsM̾f:6q&MWZ?Fi*|.:2xSh@J?c'{R_|5:q@h-4vH^ }5%6rU[Y kKA}j-_=<< 5.RrƻX7iʫ#6qG؈Uv幵 [~UJA3?Z~'[ZMk 4HkQ%Tdw[Տ.bIkJrZ(Dݺ1 uocMzdD`f*QTTp^ 3z2%,Qt$4zӵV= vAw{DTS׏kGPa (rBk1е"( fBS2TV̕y4{ ( hդZui>`@hX~9D{>m>e{&Z028>nu~5T 7XS~pV/Bo6GB~*xiص,e֐kSbF|asR*s:HQ2^od TrĪ>J770Eq X\V:su5€"ر泬zaX'־*(kկ&-|66jmAC)%ge0-#P9gnS7@RsƑ  c湉zz*sepF@Sv_{ ,+~G ̀G*C}ppX\1-ҕ {Gāi7n*ӲW :nD \$ F [-fn 7ePk.'P0׼BeT~{ƣC==9=>狟=!D>`ots%g@xI i: J [Z HoV c^U:7ѫ*M,6ĦJ̤C%l΍2d[ݧ-`!֎+u~b.aP,577(1VЇUl4tDLeU>%V<00\ċ/5iUJ6@&ޙH~.,œMσGު٩E6.\K/1P?;v1Zh_aH;E֦WfnD&;NR 3╆ kvDY |x%PGR/?ۍcT~XZ9wQCq4' 0 ㄃\4 cܖ>@`"ӸIyב9vX?? Q'jfQ*@jߢhK kP0@:0 N/߳Qlv,O町;wCœAn~S!QoQY~00l3EF;7HQBjl:Th"MO6yiOwGZşxl̲ܰU (v!8i!sTd|=Z m1yk/j>vw\\uWBsD>9,;T%v"vpʙ|pqV9O'UgVu q8}dRBQ~]!yjޒ"H?ۿbŚg~AƂ&,P @U/$L̶JnY#hEOzw+E:H3*HhJ%sxmp/gL-oDHJTAT- _Da+jM`КDpnWb~Xz,j/Ю5|=.bSD _OB3Sik$/)ιh\Ik?]x⃬7b7śˋQ=I_ fԕ*51<-!=PCQK40@.$ZkœE`( {8x%q2G=9,<3ᒬ -6J%ay9L1j<^?_ yYXRxN*.ҕmkvY#j/biTTj,  v0^c:ƪR'}&Y1HzsGH8y4>׋/sb[sv?{Ld}^K\g-r979?7ٴiO줅rWSJ7/80;6VR8YLG~ZBg ^QD }I⟗0nɻܻ \.߹ŅSw)QHmݱG ń?!^O(Ӿ?laW|ktl`a*Nw[nG=ej%æi"1G3),{|.Q1\ÖGvlm~7z-Yf09'tBcPr/nC_Mbߝ򟆨N[B}DBROVw{ sZGP /Qu`1/MΚ>#5`ASpŠ?/PCUZ=dNDT_@Ü]w'd*?\--FB%ISBom+(J@[a/c3nVh vz7@{~֓iڡ_A*VfkZ&;z} @C?7Heq]ӏ@piZ5x?O}6Xp~h%[gShA&& rieFX zR(Tt];Q}:t9"~!k6 JL'1FSZ'%ڦ+__\" K] ߿A</w = xl*!GrC۳n ;H\TxqD@#J`'vb@(Sjt,z~E߲!NϯPCUwxNLwlQ~ZeE.Qj [G''TxtuW {wXIt>C#ṼToxꍠDdqFP奓*X'XN3sQ8EË|-v_ҳ^ q1 P柊n?> V)YL 7f]v-UaāxH8C`P3b{Y]ZvߧI!^Wr~,SBϻt1uINX@"" Bl_\3iH4B"" Ҝbx_Cm'p^hDDn' eg_>;a-O56ƭ=Ea p@ /|~Pg AƒʃQ23؀!uq># ~8Ϛ{-Fs>'Zq1`o -nmbZYD/yrKIOB@0VG`>@"&q܎x1C1-J{ M2)Wfp.͏4VX2QjV7G = p`pc;C{?g殑3ψ+Ks %l[.i7M %1i,?|uXm|Rw`_wQ*8ߛ݆uGKq̔5Zb/wNrh|.Z\BhQ@>g2r Jd.XuUk,߱I$T=RV+ٴaH&O@e0A0#ԂB^ /I:‡cuPϻA`nYK 3/ @s8SWAТ$(#!%B*z-?im*,%(Y'֣J)g GBtr\6Ug#S*`{sV:e /K߄cHB"r2L;ЀS "^ ;[R/|]wJn].[}2r<[/#`;r-jڜ%.vuF>oi- $08 ]h|Ϡ sP6\A.XcKFk{E|~T/>S m2 ƻ pIkΛ-0^b9{n~\* ,2-vKpUsY~dN޻F,9Nz힖kVanUA*QTq}eϚp=3E@O`>g׶0"q,4{.Z'Qgۍ¸@@4֘;-JO R['n B3J۠]s& `%l X$1.>AH%qy߹=z>RP+&-OE:zsctaEs_o|-x'njwEWY(B (">(M0Ah ٩qao(7C|h_h<XR YJTT^08z=MUk(+ӌ%%.4@ ID $(Trxr?%Є ;8B: ni/ x&0+dPC)L(ڙB`9r!+n1O\ e8@k"dPsl4#`ZU1ëy3E\?(@q0 XB5at," jJ>[6"x#1|kH_#F dKА*y =lq\X;`YC*Q^ _nZsFXZ,mqo6An.!gsxv9u[c.e­!V͗b"{H!6ͤV+CM&TpB2 _[~*ҘXE)Z5Y0.t]%h$}+}nMҹl>b Q2d3CmRe45xfp'aPPv%*&eT O&T=*"~jEC]-&l,+ Pa,* ڙbez?ucV,W{eϷrN}*i"ewз zRK感BOc+(xi:[ ;ۇSf6)s׌&d(`.P*qg{uve¯U`~F>뚶?E̶ JeAyNb%clqMVr`_>tlUbJ8iuB5#8hF(0pIb_LT>TT @go yQEAk^ Kh+jw2%c|,[ERAӻ;)|nޭRAo^^K /j)DٕO@@ASLPd'e8Q2X\Ȳ1anhCjW;C QL(}utuh8$y> й񽼾+p)O₍!LpRnlیչ 7ǥDwZܱ%lAOc9z&LKYg,|r:xpFP2<̂y1u<>W%&l"Fk[p@w2{WNӨ<)zݷSD\#:st : 6?:?<`Zeo(Ļ:(;bs>C[^l`9H}XZvΑp3T/{3-sQȈ`$wn pd:!FE$޷l0$B"42@˔1\~b+ WxuP*TZ`^0ﰏVreG@G)78~b/ Re_la5+CiaHp)[RTI t {}֑wkKN+)n)%C"dqqdi/x r^2?PfbTbN/&( >_I }=s1qNxi0iL{@RflI8 ڙRFh'H)`: `'%dhP? 鐵{o1_aeӒ/Fd M St" @^z.U6\O|Ŏ⵲,xw 鬑%ml(؍ rYB2 x0!s;w+BFl00 5+=e@G*60:|ټ<m[Cx""@z fu1{X+Jr (-aR666Qg FTCXWfF&l(-;F8L2eko!hY  ~כVh=sOqC>ګuGLi :FKBsW #๏ 7ɻwlw8Jww_8KZXnIsQY0cHd͒j1 w OubWuꑄfo鈲0Xڮ>aQJwx |)K\i#ƀ|2+7Doeۋ/ؠCZ%S7 /XA{9kʉA|PENG{C)A/9Q2tV؜&00S?т]rp<[TF~~Ef/\aD׽ .+; 9]xaPkI$k-""iێ-y&^Ǝ?t+^i"m3}1>uwZ; ORCՁHCQxElyP8oЇw9x$@]53D`.Ӧ`uCt@w`Ƌ+b1tͻ6lOJ3[dz06hv^.[ ,U'W84O}KNLJ/2(ԄqCLڪ5Rv"=# | CASXL#ez>'<6G}DlxĴ_{H*; ڠGM^e M zt'4kXџ:TxoKTf`R=90~:4,u혂n/3Rd ,'=ML+=?<)t'Y>N\U`Vyu~ u{ڝt#8q !J+D܏cdDWw2JB5T #fg(oU>[iUus%7īȦ'mj&.(T?L(/ӵcpkvrʂ4?|R٠. dCf+0 3ww@er1w@IfT+r^Ժ[c,SZ:)~)R1 }~ꡫ;)b,?,>?-Gk8A0Pv l =.Zdz8 e4m "șF x.Q6cg;*WN;`H_}k8RIsA-|pu[ㆇW֊3*I> DUuO@ofVmA<\ygC%ݍJZ勜V>-L=X+Y*K1)@T6ze\5Cp}GD'!i~_p2 ^w k6 0D6UvAHZYZY,MT]lgC]ɟF1]-j<8/ؠ!ys:)Tu }ND>_0A 0^'lo8 =QA-)OT}TIItN~ջ4cRd\ a.QLSr(!Ag=0pb(*@̇0dQC-q |X(-^0kCZ[,s{zd ljK#'3,}*0sk nxmp8Gѩ@JPofCT% ٙ|7J@tZRUϙ z:ב'R ! +V4W"NL)/˶^0mn8 3y:0=ahZ3tbAhE)3XF%;dƿiAxPI9$~~JȄú*MBʕ/ء!f;TU$xuىnU;A)p܍z G1n a[`98Qҿݗ&x_PxX|*t *v@*J|<̇/RgvX.3,hab{߰zC@8±zkxa5Vm)̓uC֝+E--$B#I(lv2UTd=A1@ɧ^kԢ|=qΈǝ*wȽ,;b7YnBJR-pCrm>so݁Fh٠fqƕl8gNU7 ,`['' '~ofJ#0]⡴4:)!y4&}Y(XkgCѐ &k`vѵSGEy)GE48QX% kr-5dxW'>s" C`Fq,a냁z"4w=KU3 7ነN6#@`%&Jj$"=5DhGBZ43?M6&Lah>L}e42tr9oz%%؉OfEP]9*]<0_Sx*^@fO|TTc  &?8 87k$C:}~>@R:yh{-}G bZOyY0>̊64*WA|(etS|g6q"u媽+QzNy)֦qG NbJ`@NP˸(Ȏyxjp(Vh^+QSǛq"JBJfp-teC =÷Xɪ< ?y5i ČT"=,R+9SX/7/ 7[VCtt.i2WZR0paL,Y,p+QSYl_w|>P_(`% /fw4x2=#Ik ݨ܊ˡozM-^8l o5wO)y{W۞7gK5Jrt'1{C)WY K"LGOPqh^N.5۳ pbY *b߭r^']BW{*cv(@o ^s5 .]J$w~NTHw;FT܅@[=?z| )2LLLLPE|i^Ϝ;t:wAhwڪ*ߨ:>#ӿJz Nc.3 \*j4r:xPR_K'(g,%LDn<.rl-ޅryX=ÚESÅ?l&NrgyOqjD8޵x_6"7@1f}WHV`,du-#m٢] JF%#%s Xw0bVU;ucs1j@DA1c.NwrvVi@mtB31k%uhJ` qm+# čgjY&6/Z+{.1JONv.U W.:W̌~ܲxV,d`/c&ϯqkJǃ ֊:ŦWշiW3C*~BzE)Ne tVL&n?'w2n|*_&m!*="yڃ51ST؊\[Bav`vI3l#"DPT5N Nk-0cL6TNk?\m b偮ڙPO(P<"b<=nӋKRsҷcs6i9?OŎ%WQɟO-C>5XVI.~8kj~EB$Rj+'W/Ev02G$Qk//e"^`Ě:ego21o3EC"r^V 85pqLLR $HyDF 9EL;-$b03)⩻WnO{"{խDv"j48hp/޽b@ *dcu4Ծ/g;}7zV\"/r!6-m§[pK}UT@614 3XCn&Ttn/3xiǣNMH_Uj kj[7:O2d7]~{Ot\:qi joA54lҎ Û/1 Ql4EVRmcp{quO}:&rPE%/BR}.B{p+ 2hGBOq$_g]Ŋ$U'4z2eNTqFpt9#B k s^*BǞڑ<\o)fK1`w^i`O陁}F3,@B&#;z*x/S0Q4O z:ݢ\ +y~q]c'ې@OЛ艮gz̓Sg;D:$C߭fIP1p0)7On@^+tPAhRk9ql w厤7kz!r ;HAn;Ԇ&`=&k?"vRK~5?+͆.ns?>q$%Vcu¹ 'b^m8oe^g\q54f2R]Whdg{oybq(Iak~" ůd-MP><m< FRw&zk_wrخ9ݐha :=EJ>+^ܷ\3JH)gKTR 59+fcXigr,C7fV~ TBZntEwd0wv"C]ek`#ru 75A2cؔr frY;~ rsIX>=1u|sHWŖ䓾ltqΦ6*0누0Ka6#\cwu3P W0쯇;[BtN wvRհ(g0Kl|?$X\8+^]~#[HP4JFc_\5X.|J@%]bwf)%  غ L}O^Ra' >Z(vڟx(dJ2*[03CneTH97>πZ+\!BĴovn]I" *Rp(0w5gD \Eb+M0QCU@\н7~mag6}]:ML8r-3wq8y뀽P-/mut%_Z>Ѫ~K!؊irn@~}r'sr-Nljw0eNIAEU!8mVGx+$j}6 R0ztCNnԡi4;ko*%`[8tj&ì첟Cb`NHڴ"]'gc|!ȊAm4J o1<R4?q ؽMk |7B[V.{ڎٮTͣKpA <6Q< *>q^C&3$u$0D018]&.XHoh$T|w-|` m(sϳ t]tdOύ <;` .E~>7-3 EeyK!uZg;eMcX'PȷP=tCvh?r![Xt$8&b*keks =Vf: :gs!Ekv@ʩv&L;r{k 8~T$}BDXو0da[-6niV$"h|V=[3zo-W@每iٷ;þ%Q8 T3:^)@hǑ'<1}T,2pnMw9]޸"ƕO4(\P7jK<{8]?*e"X#P9U+v K1`⦓%.Xp_Ѡ *g^pv„f0 fA%W<$GKEe;ȁe8\[ a*= ZA{5`  <p76><;ef2PW,D'Ik]|o0f&^>H$xǃ>=n-b#Dܥц9b8`90rp@~&ihXlo^a s}h35id]ۃnQmb7eu3sgN/I"db9l%/W*{o__@x@ҮL68ܩUҊ!tՃjBXE4:`Ym`4Y;kޮoW}pX/f|”怨F̗=2|yxw](dA>֞/!{E[ޚWټÑlequDƸȓV`oFM5/_"f}"YR{W;A^b GJ0/R=/a4]M@_@Vh1c-;1DZMo y6j yxl1W6'BZ1dM3QOsw;\zҩтGC+ -qM 24A\fx[%qv+u, IIl7Lw'-O@`;\g" ݳnX FntVޓylHQfɒsY}+tGp*Eӡ[E\_%qN욵]BL ܽ@"$*Fa{#VV LZh% +M>4T}_gR NMv+ בq"$,5pBkP.X*|}M a&ߍ`SǻI)UՕxKɰUo<\~}KH6jxxY_&i3렁0hiiF4%.J.&x/Rޱ&/FVod`]huo(# ϝ`@Ƀð 8$O41@-@f8m6o؆,=K$! b\Q}8A;+p^B,8"4}3~oO5Gmô`RJP$S^8@L'Sj} 8'eȠHW^^qOwLqeUsp̶mUD?ؒw.3QAͺ'*wX֭ OH'E p˖*%k?Ō[u<gz u`fpTv*M&wt""@(;깻o}񉞹Z| "] M?ҚY2p}h?@arqgUv  ќ Hܻp\-:CT@ e(T`RO~,ٔ w7jI.3˾YgP8C̝F0pXnf -:ySۋ}st.׉ZxDe('%xOɀ=zN#``&RZ-j0gZBy:W)]}&Hݟ&fJ~&n_A+48{+.'A^UrS˝W>[>s.$joPk\})9-S8PmC1j&VVFtM=9ìޞ;Py >҅pY"z0lfEYrtD*ZkkmL0&( dъi+U5Lõ[ۼ2/K>`YYhxA~NG{ޕ t @>&oiqyA5>Xս|Sxǯ:PWdpYԓlK .vڄrK'*|iJ ¦^'4_S( ?9ElV,jrm+n4P@Xt?T# /w(:8VϬr=KA LrS[\yk,Q<&t˘!) UVk,(srjCH48LG#7Q-I&ګyJ]:gZhwuNJ'YJ B;Οh/uȄB꣢kCv͂)-@,3m׹ &3C5a.-Iv'M h?C1e<@DQl^A.|j+KEXzj{eZ62Z;ܕY K QhIZivTw:^]~B?sgv< Dh.,mhwajH9ŮfC:Q+hD뎖vԼv,wLZU_ߞ埏Hw[.r@3:^q"iZ >^'$ ͅ#J&bODhh)wکj.i#wZt:KZͱ;uϯ1{s'};0d9 O#zJ2؊%B}+@@&Jj._ m0 h@^@LZ TvauJ-:P/S}q*[mP?lVGJ}+Fɮƻ0ϓ"GMѩ~llS{kP~zw]HKTsx{c94 a^K-alC]3QFs,t |x@< GV?, l%Aw,'L8ץe[`>-CWDՖv*X_*kHM쿄3#x"J%fe< M+l%=4&*b>~Q/'s#tZNscWp<3Wx׍@Yu<%x "~7,?y#jD1@)gX qc D1?hgmL7Ч3NW.G f7ΖČnϘv6on־MϠ \RAY~@Mt؉^j!jp:,y.L_p<poɖ3VDiA5p>xQ͇A~l ?Jy-4$z( mIe3ꦰ@NUI. (IT)#ƀb53㄀(DF.5ur$)& o&h.@d9,e",aNdqϢI ΥYL3EU˶.7~BȬ]_!#Z%je jtLS[0U2+Ӫŭ[\|l(>{@ MGxy`>Kb.d恳1z:"B˓O=v8 V_[ZNwa@k*}r9kaWhKaAo!:X._ѻ@2)7 ԕOèeAXVZM`I\F!x%ܬg῕ 5;Ѐ6MnuPe`d2#PņdۏD 7diHGQ&\0+਽耭(ԇBA!\9As&2u4s)"zqK7kT}jIPYѫҋmA[OFYL}KPG, Bxtkٓz f=lJ$0wmw̙ Yr`n%PJځF\c~q>95(F޼\r[{9'u0a7-hl@5+fX"`g7?$I*0@~ I^)|92Wg\w~FRsWcb@ )nmó]wk@Q){})z\A+5їJ+իBЉr0g\+9~ew k>[n~ rÕZ}Sϓљ\Vn""8Xz# %{Ь'n^*vv $;!T'ܾ鰦=m.+4qW!l΅4Ly^ $\٨{Uո*걼r-(?Wp% xdȘCJHw9Bؿ=FL-U|ؿU1+F7?]/tj-x|"pafIʲnaAX9ђY) ɏTmόXn:_ʳp1Y&6=d{+wJFP*(rD0nq3Dz Fр~k}XctiD):-[HIb`@Uc@N}Wi3[2vsURiݔ71OǗ[޻s< 4#RξIk!&I4qIMjSQ jht[;ɾ!L88YRv\2j_6/ Sy㱪R: TTbJH~^z@Րs tѺ3NfP4w9ufURH7K`2Ȣv~!Kr&w"w#+跰|&@킣%߻|f͑HK]tEU\Q N Y`c* 7jQI/S1E8`?]ixDDeT9&v9LiB" @ 4> VOսo?+b|iZB{+ NИXyX9XTw]=mJi-93upmTQ=Ǹ_A!IJ;~SRAl݃zH{ aD I}8{?<;mv6f$jJL8<=Μ`d@~Sqʆ@(Hf܌sqG$O3Oh{Ök W!%9#? pK` bS$o\1 ON~GPd"1,0r@Qguls 0b ypHyH@Pht(BNibx~{JN7yP7aAnlX- ,ZPױDG=\D>LbRt{WH/*~,m,BzAp>PuS0Ywbȉ*V sR$Lb1c;T_p#:+xo$TbB1r7`;ޓG!B8 5%2h>Ax͉ubr[  rlqu}D>;nۄ{jM =&e|8<W)7˜Y<Gr_\UCWjI oA4fI]t븼3C N;..{rgZ^5:[mS&t{~A9lO]~6Ut?K#rTf&\Fe\^lZ&lY^P|K&Fn5RmA>(*۴azDv ܾBIA ˡ .r ]A  On_ӍNo<xx Ћ]M fiv ߹׶Vѓe(X:i$OZ./Yh^!=c< leTS#B6E;Q6gtg4+`hYdR\HQG1M q[h 7ҍ+ 5Ł@>1u8&߁:iEE̟po2-IBN(+b0pН>H /SAіW|YNE/kVjx럕]3/ ʪRukWc˗0(*ڴG-l,x6h&Q%?M+QNmRDWE/ "3$0etw+.ٻ}vS)%d*ʬS7|)}bB2(#;B8#(ŨW1g-xv?(O_hWܹY/I9|պtԔN"Tp9/\TDTwG=*U?^ nkN̞CVw'A 4!47@O }g~glQXADTWb#ӰB|6 8/d8`3:ηt _bnsG60A޺JV~0nե܏|>(/Ufs$(00c kxs9cЙb%gEL bXFrOqfTㄧ!c 57/O{Ě@P@iBG( lE sZ G'!#Uhh&R$ 8rYtixG&|afuVe82]wwvV;8/o5Y:^vG:W,mގJ ~aILŮsO~&eRYԶ }hSVwOý':Xp[.i!Y!SK/q%=1>48*]V}>+G3 ߤL%WB-)h=nPW1.AY=8$URB?HZ{a#w1}D?]Z̄eM5qHp je7*,9Qc讟Qvj9 飴!erh=n'*֭^: z';߄5\/pDY"ڻy@@=Te'%/vQ@CnbڣsƋMq\w&i"[p7Ҷ"tmڕV@}R-^) HyPS! ]jouɻ׌Aj]/$$2 ᶍ;:?`Q3.p(Lrbsa5E' CnqWb'$UZczEo.<6鋩UdBn,Sq`O;q]!{!`.aıMo7,{9R ƱT8V#~yܟyc ׌#H1NwN̪#ب(a^e!SVM 1OM*OZxn׽NF6Ϊރ:MBqNkma _kn[7e[b4Zf+[9fM,9.Ee\d3?`ԠK_ o'8ŧæll4 43[= B݌47k@5\F%B~J?zSs٠4Tm =XWOkQCt3 De!LjM:W{(DŽP[@2%94 Fi"u;!ύd61{)?S [`;n(FJ^LXsr0gl%Ma;X[~iR""0 8B:^Ǫ<(F`赈X%EBoX!σTE0FM<P9Zbx2;N/pOL[&"Jl0"ꈪUs>;UH¦K_ AhG^곱Li>܃MY%8Xv;Α#E`}cm?M.C֙SHIٖgz0f66UY(>k,\+i\\ u3:nS(IA|` VdI 2W*AQx %vy*nl_s雐'D*_R4TҼMeŖ#]NX8s ;6ehPQ5}GIJhyIlN v!*h([ӗ/:g?t%h*>>̀h.A'mHKc@#<ؽS1MB44.ľ CJxj81|y'\Gd%) ľA5xez򤊤œmuAE@lavAU!S: Uk1?wSw_5Vmڡ-~i C}nЯH47Th`D9-> OiF$yFdI~p;^N1xa y}H!lI2|t/Ci(j+k[ `.ckz3ozOXYrMǾ( ~`khaQWaYO_vAIj KR|B_(`SCg>E$ ss"թ$H`cJ` 2 q{\༁3wӴ 1X|0w$mIc$Cipza=E-D8qW8e9-Fx`2Zm󴀿/\5̠`*1*M§j 7Cdz-9K\|Y!|X=Uqxnc-N1e_ل~giI TvL+? aL Zy M>wphj |H*l; (v؇ᶙ]xM~*/]b\3(C+zC 2ΧoL($Hu5 \c,~[!IZ>,tzog\gNMyIaځ1@~c.џ _"!5u}wVk[]Kr}U$n:"jT64"UqCmU?&L(!>k?$bzeaxʣY UwG 607ƹF&G8(Lܿ`*!m}cHR3 Vy8i̎1ǜ7 ~6ʀ3% LY`%D"Ěf zZI3.(j>dnĐA9ySReDMx~njr JIk q[,BWha18'nL=H0cUkBVe9ŏi̳[Y_#ȸKDR"bLe}dxob Ocl| 5߂ZF @ blmcمş+336'm )T yhOŋs[_PvP'i5&T5 @)]C\7ޖ{*+6+hC+r^}>$-MS2~n(_iޱ;6U/Xܗƕ-=J)u"W twД̐fGwMf3mvpO[]m%=ybW"ʹb%3B d/5%kfJJguF:~Cu@p.ѡ]  B(\$^% [I; -pC[3 36-@1 c8Av_Ԉ1UDC dҨD! k m9 ١SӶidTO&`uf8"L|*~HJMl ˘V6wa1 a{;z#Ru>5kЦiWQpvh ǧHݾEKxgmx#?gh$ӼF} 9q@q>d@`=/l'ZK Kc%/jIu̅^m,lM(5QYï'mzptvoEjOqŠ&|G6H;(F>*!Xly~ W+`Dw 5~ TS r]GIqO%ḟKgMWTAJۙ/u=re?XI)܈߄ce5rެV/֫- h?$M0QnsG:u;Cv$tBI_x08!׀=z @:4=G( -sl H.#KD_}'!1#_hKՄy?z6 YyIŽB>2e4FLHPz 9e89܎3K2΅Df`fRϛm=*[tk$]w0#N~uՄVf)kIc721ZՎ4K6=/fʙg..~bWE).N7~3 %Q֓v 7]ܽh”Jb/peAœ-dJ8z26pf>@!fz,? 6|8_k$DaͦcDc׸t/$Ïc/;e6m\Һ3PCN&p.𺉋fg|nEryaTQǾA *b X-`V! Ba+?Ak21.k"𿩻7FJ4 U-@4q RcеUSķ0C>"R1e{9^Jw굓"QT,4SĸdT<&p1~idW: 6݌0}Ts9t]_83twxډY'V(ozL2"^ȝX!ٓdD-@p߸ AIgSQ(b^`6;2aq:V VܭfkxC0GzjL @ g(ZZ2u2.D ' |@̕+"|q)Ȋ9dߢ$]슅gu' .nlCYB^woݧo;9J_@*y3 C402^E@9+u 10#ؖm(0n0dCswNAZ{X|c[:*? o,%hmzNzy)1 +j݉GhhBEaZQ:dɐ5٩ys5,.~ (Y;>I2i}#%#,τfі*.mVZAa ͋zr8BF$8Ofl=J=ю3<ɘ?`efֆhD[P[<CuT `Pi¾H%pW {= T/KB\ԦT_M@h$0Xzc(r![:v,%s{|HHO.?8'!q,Cмjyx6/B<~ )$*Qm]*_ OQH/ey#IudI[AtVlP,.#Azn\8xRjFFH |U*äϯNQf2>v^wnA3Nq8u*\}F]z礮>b(@N`V j.1kӟ-܌Eu5<aDU U]$_cdC-py{㰒Î8bdъ*{99ULpoQ[.+Y}bz Igm 싫%{)U_)8;ph`cLKAeuHnO΃ zGr9S"1C۫"^gJ> 6b~|e$ސĭa~Ά)Cdpmkg 7ǒc&0;/Wo+*E80xxd72X8jQGg_ vqvH -C09lD*!߳hƹJ(Ss)Rb?1 ;fs-*\L 9,rvaMyo;g9F2̄ $wr2*>Օx.0ݏ`VFZGGUo:yU=WYN݇Љ0Nۺ)ԕC|~ 7 PU9x$1QDZNC8 TwA4;|wYx~ DdS!lWqa FRʯ$y쌚)}Eg WaGӆ^;݂ $= cj¸ik""{C,# D+wP͐2őAw}$=rt6Oōئ,(9uTAO)bCJ%6+JG?N{a/uk[q3tX)2k}6/o c\WAdV)~b]r5s/7Nηl^m)\x6}R5Qzsp;)w0 Ь )ws xE@=4B@l?u*;c VC2i4=C~[ҥ\j÷E wJ#rujwC[iTL-Ӆ:4 !{7aoHwH x:T_2"9C*+l&eZ=8J8=4`WJn ,Q LuEKaSPf=puoGvokj9-fC].;# S{% $OLMAO 3FWl+>."[Q%4{6#a6&txـ 0ށ&6@鞛6&w0I88 ir;l/=:F: XRȥ c)L~-JcwO%j>Ǻټ j8S}7qa!::^Xb=fh$#XpX)M%&I:.>7^- d:xC+˙]G|+<Kht h\V-Qh4]27Vqw١M-r^ IBBGv= r|*ZVvj]/™4xOaiء뀀E 'K{`~B "F9E(XyZjbte<u˦߼,ݪ \ɒ&SAR- JyCYBή?i^Ƙd( 'DmHw )ѭ|ȱ l *]6[8o.pqN!]R0`܌ɪFs+Gs"6i/$/3Ӂ\2L @T&+L6jdx(@\7 %51t\:Äֶ{ $ϗ*XzRGkV^3?#תnC?2, @\Zړ-Yx|hQڲ"zǎHg`yygѓYz(uaxU |ǔ>bTٖN(ZYZߧG=Aw |ۆܐ \dFܧ M{ޫg>A#T(OR 2 Ƞa۹ l;K4h!,p#YFbG+b(Z; 7lL"݂t9}k{*>*?@qEg XPyϱ'Q#\Y UCL?@dSMyTԁR:){$t&5(Q;uę)4{t*), mU[t|s(w£e:c՝^Ʌ|q31[$F'H1H[NLwՉ\;b ]6d*{@tTx\|f|Q5c4 Y1}ve˗g L;9HlGT` |*PP5}zo:JudT/uObE<3v ɄLS7hd d@Pt!$dEo0XΟ2!H2&dLVXfD6E0E4dSfJ c>&c E0K,^"w2B d́` d@W@#̀,ך TLsLWE똜112@":" Rp6ζws"-; b<X!0`.]H0_RWMgC`Sbd02a@,d:fG5D=I``{qROT9CO#  ^ aFB ښ 8d!47\L]̣ X̘2:A՘!=#~dX"ՙw Ȇcwhd@ ɸ~ 9`(0Bd@Ẅ4FPFDE a2## 0 "3"d3Fd_L.07h uf0d2Bp|b "!02"DVS-<zd3iL+AgL3A Bdɐ52I Y(fD@PЀ #2"3 fDA D@# R 02 e Tn-,~#RHQΧc!O>ؔ׸)ݚq.uMF竚9N19R2%z|ch'hP "+s.{/DRxLFFGYjj(I1&(1vB#d""2I 3#0fPFFa$" 4BOgkdoZܫ832j`XL d@k0@`  Jacf}6DR4V]^òb'ak@&bE}cE0#&{]dM|ɄO/}~y,[jVqsaU[jQVy y ZSX'6 ;Z^Ӂ =gI8'(t^G/G:sKR^ :W%>kNMdZsa!x)cȌN}WMz'VJ;fHرYAˑ<_E?qNe%[ϳr3zFNRU\sװF#Xjֆ `C<f7i^W"]{QZB]:ަ}Uh^=zy_#:]=g(| Z:{=<.C ;^c-~ ۓ&i`<0TW_U;w\s:kNB)eĢ`a/"~˿.)tR!8ÅU!M_>(t9m$T^_mc-3/K-~dXnMY#Qo7JzN)Fk\_0JԼd=rŶ#-";(Ԯ1e  0WȱjoOA[5_.+8@i56jv,E_NW,ݧy* ߀ Rb+W`VLPJzӺˇX矽z5 Ms/M<Ĕ\% Wߟu]cJxASB6 bdZ):n;Gͫ@6׃crNxvZMU)dTP&"}LY숨H6@I aN`w*'l$ cOic~T8 "y'7ؖC丛uwObߕ@0%^uN]}.a " gR9d}S0FHN$@^¦%.%QSC@ H=!)jIlK\;yPb*Djظ}I@IװGq, Q._︨jP_͌ˌxʰC é MK {i 5jDFx1wYg77盀 OZy,gӁ'cKHQ/@'%qb=| eqqUd C/1h)Fc Ps^2k\[?X[[҃%!zϏit5}vb6O|Qam0[7.z[7kD2gwZ T@OoJl)-$?-hIoP*3|"S" qfYn{/aO/;& ACr2 ؙ5 8#FTj?ZOzގGruAn׆s<0$$@"wtU V)LRU<`6ٻlBrL"r_+G7]S Quhr Ό_8^.-Њ!$#y+fAӡxyҍj:T3C3wGh;5ʙ$i:jS_AN9b':#M]j27Bi2 &.s]38h #U6 veg{iq%a3a"X?Oaw^rvqV؝,7|(ob%n=o^V}y#/YN:w>BSw}𲖲Qm{Gܥ^4 @ڊcnFLϵ <6 i@ vPA]bs J ")'(xgZ(#Sa^=:=*i빼:Рy!c+͘ v?nm͈Ά ',|F޿shɘ@$ I`Og:&@ n#@@Z_)2@MpF xp9Ayj&o B ML3 R}ac 1C$vP/5 4:8OȯY3ox/~?YÇ[Ѹ8^[+ }:p΢ sICٟTiO+AW\g;Νj߹@ݣ@3$tQlS#ҥ-m0w ~a.wKYNLA~'UJ="fʋ|}:k:S&WzE51an17BL/c PLg4k_v$%GAw0i€ PL ",?1c ƐCor9ܰM~ܻzj}rewVpxCq1g-x'ߣ@'\:?}ښs -w:=6B&Gm "4 ZoRtm ;ֈ^}\RehLVܮbV@C` 5=Oں9ep,h]hLߏ< J8".B{W&.  5#? @`#,5l@bXE"97{ñN~>>nh ;w,ۺ Em m1G4kY$Hi \upʍ i]H͕ϷAF=\SB{}5(iv2qѓųZ[ݻ7 Qջ5YTfWds1WC'%ojW ?. ]L:N9mS&KzvY#T =%Och%0h wQ} }O`O8OXP*HPPѝ[{jjM'/ڥƟ'wĊQ[~Psћx@i *r7iw&/$u? #@H"0Jug2s*".D*vhA%  ut&kcA%tm@Gf]c/uЗP Y''Ci [u* Xvc3 2V"[(aCI|f+rF1+rL j{S1: D m1w eWz)E)3՜gxl'ײ:ȪaH-Ci4-?9ЬԒX@YȊRrDgZā@֙%A/= G1g-{'鋖-J_j_Tb1J>ƿҞZL1]j'Oʭsݷų}S@{Ҧvpsv@tq;W2G=A9SA7TʢX @FCVxkN:3 /YaΑ> vR쟗ԺHsowDቸßOdo$T= @-XE@A C!i%,&"O a/`,8{9W+zoSC= ET`8G$srx?cpyFgUuͤZC=o.Yǣ~9A}s6G9s~;[WAUKADlޡX d[+gN{/# |>X<=VI,Yoz0w' ~9faA3 lѬK0D>pǏb @Kv /w<#Ќe^ u0bz66@Nw~Cc澶SzDcުmCVo;_adS?ۦ54%,z"1UT @.*@A$":ȫl1|DˀFFG%*ʲ̴nWN8 :xd v"Se=!TmW)5p`}XcŸ9]Ycv!Q[(] & K{8c"g @& 0v ;dgȀ/{:ؐ$pڭ0d0#-B Jq/`_"h"@9F_h cQ_,) E!jVbyB#V"]Ax\qIl8e'+1X? d 62R1RbhPw^֚::KTb6v/Z1;eW_s+;̨`&Pnn1[8B=6tWub_dTx< ʊx<'OY{%ayf)qm)t =U yZ% ͑vŭf[k>fL߱2"os`fzcם/|~ѐ2Ro'SD)b#.#0v<o?N fOmƎ02_9o洕B~=uW.]q^&RaTH4Nbtr@J$6cҾHO_*u#B^al qPOL+uŊ$Rd;]_N- \ 6΂+Q= ;UQt ~N)&HbsYqYALSC^ )qEBc5` aC0Ew9 zN|!@K~!c"FQiNLǕN;u nlχ1+=NcqP= btg(buNf)w %p79m]7OP]Ae!G4',$6^l@UtnD(0y2VvؽuC`I>j4\0 E`,f u(^^] 8vB=.Mʂkc.ufȝ\q*0sRZOl!G[LLzP~xvP2j񹮆adIbg{ٜ*DȶCP~7U GF dK9wF"$+r}Y]2m|#mqF7"1A`nbꈯ억$FڑSC.ewN=_1;ۤ06\k[Fow#`wOuȪ ٿ,P\|rN\HFosyz@$v`FY~ WH'f׏)OaG2;mh#"PvAI'MѨ m>dӥ*s*V e8?5.}zu\C1V鴔S4՛s\ :x~/& In^-7V0O}\꧌dD=7 Сce&o pQ#2j3m6',f4'S@o;g&Z59qsW^~(wrZ/OW[<]s[U~sPP*e` 5mrn/ICҙPk'%rjiG z16^h&ÖGcfxsaA##?ƴVEl1e^Lcqtڬ BE?iu<)Ӊ6!kI +tu3qt׈kNhR eP=p0(0(|̔ly= yCQe?I>0EW5Ts*5]yR@ XW.5 6;FG"{cҿ`~\(%^u[ݚ.ވ !zpO.F`0vSAo_ jK1To73pNw`j[6-Ȓ|sBKwu;;I\UPQs8x X MZ3ZF.2}1v\op~ZhU0#0e xMGgx`B$1 n@ Dm;([&I=UI9M33ʞ+3ĀdתxC`]M+InoC&Av bMO_ M6"IQYnW[f ^"WM˃ܕD%y*~%Caq c Ƴ:+Q' (@y{@)IH1EDŽlnw.l峉g&q|7߈jL,{5g4tCyQ a쓔.OeSaWJ (>}QwD; ~:piBi- A"vbA'Z3wz"F~h*wqމĊm?<!2ٽ/E[HBhTL taCa>OWXN__t9 9BZv% ވ"V_1~]~Ql-!{]b 0#!11%3jjZH9W%&M'ŪO7E(k,J`BL|C)&?ku Q`˓Gc+jW%õef]!mIвfQIGzM5R_-%w$ĹɋkEsQ&g ގwlr0W_=mn|U1\ L@v 8jf%o8+[Ec:*̬yWj8:w#^wТ'nSQfG6zV]Q K=]Qz[@J\ B-%Ļ?(;;-^|M*A%4*幠`yźΙXڶ LA |42<%PĚnL) 8BIfYTr9}H4LA,f!]Q"s1FЖ*0-YT^Vz{ 4^DW.X܂K~7ݕi1m3.qE]ֻodC B @s#;,r;7ʙzuP0B7 5tcC ې(0$|k81F.}, op{L!\;Ɏ^ !))D9Ť۳.({uz7'G.* ` ޕz: cf`zGX{ p/A%#]種@3>oy%ׅmp,TO_q1c5bN=Ѧ[.xiYS [jS/cK!L1y_+Q!LJ%=\*0Y#dt-2W-fo$d$=[Ɩ1uY-Q宍zzeO]t;I>Ѝw}rJ2f]*AHp ͍0̀&k!"u0N7js2 0`- O >I2НLjaoi=kqjCij8cͤi3I u ZbP8kZڂi$7'9|;MW-Y.rO $"]0i6r nvעrb[-vIÌh4ce5K*W:c)iOB|h8!oί O6z>U?b=Ŀ R[odbY2]uF,uOIPzX<j$Cvdn2^` Eb=SW ?6Ins|XnOiUZА[7MPY]AnFd T@wbڄ0P ϖ!!B͸`Sߘ)10mJL\98U4y7 .0GZ0H$#8LoMw [ P t7:v `1H|u,Jol{|4Tq_="WMgGAOّ@$1 GtŸ.jY!@x08|"uj[N_%+ 8rXd@-q"2C״# kk ZKfkuN7cе\j>2Ȏ̿aG ODk=_Hxγ*4ʔI""lӴߋGoiͭ1+RR2ZMJhk(zI{^U\}Ty9-f>BK#zhAfPo_H6+p#Ɔ& n@yKhrdm$/@91!NSbnS]]UC1|KDEN8-x&9>HEHZ 1 (2]\ko遊5iU!5*dyS -4y]&$c]^L4ҹ'Xz]GFg~-9͵.V:e7(d kg ?q5Оkwȅz- \U>^%>-&X.r;X>HCJt0Y`Ufm8bHޚLżR\?od>AM'j'J8xNi(Q>(/yrvo=IX:)T5ً, @* ; /՝ѫn҂)w.ah&Pp RL }r&-qE t-x@qF l uP]Y/k;OL,ha~/>k'# mpky,Pk ?DG[Z>ɸ\vpbC yE&[R$k 1(]>.EMfԻ#T6m50_}9dAj5jLy= eztVb#ai'Ү$+iF~^ VDz^DmINzG~-Uvk30 P E NC>Ll955v{6ש佻? ]xbŒm}nȎ# s4^J)@@O(TK% E,YP~(/$܏/hԌ!>̅TJF T'>7X+~$b -NWB Z|ahH}znjT]!@d+:!׎ 2Ѐz *m-eX'!lt#E領u5Nˇxŧ!:8K!Eވ=R6uKO1"gҕr8e eaT: . 6%axbtr+l|$38?V,j:h ,Zׄ" j[3-v빷piRwV!WzzGDƽU~_#e`]b2aB z)l%g2sa3Bx8K&8CF%r*o&ژe'WvB%:JQ y"` #FZ7kNdp$ 0vzػ#d `wt^x{$G8Jd !jzE@rFjÌ>hSel_|Z,9TM/ :I@/.r_bh:$02+̵7#X횕BALI%y2𒧻a]YN , juJVծ= )Eҥ\˱Ή* hnf>Ԯz!zȱ4_4qʸ4 8A8I.[)lD]KŁJFDtBܽo2U!HkY{ŀ}I$Y4A X"oY&l4úۇwFz*q'A z \D j_hxQ5= ߦ o\Uь\7a"i4{}p4yV}ֆޚ= TTLt&iwDtY婏2o͞qs.Sƅ_OWްbн+P|x7,M:6y6R߬LNcP/9+^[KXy Yhnf/JsZAyRXP @.5FqY7=GpZsnnUojanHw/iPSs= ߹E)=K%擊D@g]Nt{@Y(oTmRd>,x]!6db@&~JA@Blq*~?"5 g5 80xL]7gb"ݰGaC{_yN.zxiւύʑ| D{hfFA;mɞ+|'?1~|ƥS-`1GR)'˭rG| 2`8w\OܐM/]R" 㚞BԔ$U0{!;腁 @nD\=IM2'.L½9BC|[1hl Fp98xWe ,8{9>MV'h_\OɘB.P "xUA]jcb J5 {3مoN&r4hb~]`&N><a& )%Oݿ! oVH >B砧~GǨ)H|mZ=y4J?,"f)ӲG;?!>!UFqU=j+9jl9퀀#>l0_3F)vb!_f|( c* UhHB'A ZD,R%#=Y5Rr $~{hԉS_)Ayir#M 5tkpaCshu'U~ /, ;_jn?FUc9)s|IЭn:8`@/m2v P@7̅ Ȱ(0d@;2~Ph@rOڠ9RZHfFGKn` o&}< 9 . -|(gZB0:g.?gufrLnE1aO]N巿ccumMk{K@C1sH.m2T°<+"Fo{'>Ite0T> ?K" =KxA 8 '^Gݏ--,g5=@"yv5Ð"`Rf!Ԅ"8d>b8Q}-r71_$,b]n-J3T$ 2ʷvьV  ^-DO777J"XwxLjC5P۰O=̠2!E!B/C.[o/$\c&WEAe3֬?i}Y)+nwȁdqFݥwaG J/W|cٷ݃{۩ӷ"DI֩9n)=ήY!ֆ p&V0E0W:T$&*Q 53{!Jꏅ\50piZ^$\tdzy>D3>w"|UvdRb/'|,US݋D^f|Iv5!gyrPoĶ$;|Ռ`oS6gV6G?m ^¼:CkJSUtQ5r}kS[2ۛP@ wK鐉%` ͛JIDp58־' pKVCEY_5zDAhYH጖YUؼ4Rgg|w _I&{jQ>t\=}n2:B/Ԡ+,oCLK9aK*sFv-@sZf 8Tenq3dSH4 :+iQϱԎ_/b <^r$M~^[Os)0rt7D1c8JG !l*h8V?~dD8nQmeqZ5ugV->8960M&ZF4*";r;Co4(29D^>NlٌdE56_@"#\[<~8PrN6WG{F,XN?Ior,gx=J7)!;{85a~4'1oح)b}VFF󐣳FWJ!~',DE]]{Hb Ϛ[׊}UKSu88CzF .W6(>-{a$n43pnj IA-fM(,.W{NwxuJ J@Y76C@8Q!vo͈}p2e`'yYi5 Ԏ/~}H=g 2}Pߧ{_%\YCsX[@y>z}h2Ji ݂'?T E~;uͽ!m6F}Mʵv'T"1r)>";YBL7Rx^OfҠtl/Ҽ&c\D ;s ɉ?We"b\J C^!m{Ϥ  ,p2PEXhzF"Äζ 0l6~vF [B6>}⫏R )hEV,d7մ2=I͂~`|MX:c450>eLuL :}@riLoU5F3ld3ܤ^ >|?tQ.R-#~4bn9O @ *4M1ӥbZ&zDIVfבT|@Z0y[7sJ,~G>̅GLEjEHcWuֳnԓ8++"R#8h#[ 7[8.dw]`b7u)R!Le/1[,Ax^HFYOc熶;8h qPS rOoW?hzUOP0s$VŸ^|GdȵY'is'=D4T-qw,vo)"=9\+B 8tk| +Ϳwе{]$_ϼ#4UŜV9]Yxf?f\P;KCp&`뎄"Ta*iE.QvSjZeǁ$pH`扝F%IՂx( Y,@1ʗ~?73&3/UEPž-#aMzzcS+FqR:78LBpo Q5f}k@v~'_u  !j8RDA'K΋")`7|3)\"]U6[# tPK`<>YeoܜCcEaqgY3f׃ o Y1K[8M@]6D\^8.l|:"Z0p >S;1X U& /h3u dݒ2g9GU+l ]M` 8/9G` \ݓrj ?+;{uUS1z%v}+hqfDO4(}*Oe[ô [6/dW0\\R7c^Os<;yb ;:42*,?Rj'705QN[,\hNc- m0stQ ku)Zk⃇ ':^XluLpj{cMs^N@yڤ'x)-7 2;${6yvw-l[x_ #04LY6.UC:RU^R4vN S%n-6 ?؟_̶Urg'cY۫YDvKUiWcǖ {URNYj:) lo\RH-?D3Tl D#0eǤ;bȌ̢G1*T ̿:0?'^xZSsF0Kzph&|.-ꤡӓऩPgnM=cvDvaBsT-䠽5ɥ(6U v1Vdئ5TϚ+Yy[MK dox \=iUPxSPIEo=e~SujGv)ȥLQ{tbN19oYT͙Nl)+ Ża,'wYڮ32W0˘!_/Wg@m@rD PY܂ ws>orY[~ ۂ~Y##8Qq{M@ʷȱ_gH,3T)0Gdg/ڄ60C%Wz1RYl*nOskVeLG2_ڍq,4Ut+曆\SVXV(FTS\ * |Y(ij@}6tPsk¤ LRªvD{(^UxرYqgٗDdˢO,gC&n .PZnE[A%7Hx-@p9- nͷ'Ƴ|,5$^;̕]Easni5U+p=T5]~ƏGUAə;VFo^N_=h{rUif(2C 44P~v(6T?#<;;I/Aܽh"D}, e`^4Hz JTS `P%@]sLm` o 3d.*10:WON"NߕCjuVs.`j+ 2f? *JrXãYI|2:@;ZJa;1ڇZoseNa]fLIl9j"*bjdԃ=k_u X ÄvO?`u>mza U7cAru 31>c+82 9?߯b%uo0=TY n ֛4Kq^Wja͉[r%+wyY]ǂ'Rϲoސ?ߔ~?mSs &3v39LY[lS}gzL>Tm>h=|'K^xc"pȁo~%#8V9rIue7xzq!>K+kXeqoOCب ޸hz"߿fR br:qtJ^L?.L8L;  y4߯f@`jyF =#Z'+$KiZʳ6ɥ}~olS_qFAwn҃r]o40ooV+UE؂;/2_<,T\<L/_1Y'Qv*EYuJT;{ڴZ 7Qmu!} t> ( Z/ F$(vV{$:,5Un~%uS}[3yo,f{u:As˽xLɍwx-wuO5U&[iTW=t 7@D04rZA>F'1dGyEZw"ؕ>P+*27_g&$0nR~퉁xd%WCG*ʈUmlf9Ve]룫= i  WpX?$y^(# ZKcvlsU۵yg›g[fp5-݅^ɻ*:OӵlsOwWFfIkZR,g;Q$_ |]2gxAe c/j* 5*g3׾rZ6u{5aKei'_>[ߙ'ӭ }=F# Hd,-f6g-o*dPOg?Hq-lQuWw-ˉCC۟G{Z=k%'>d-ω}ExJ;PS^2D%խ}r~h^˙ .!)&Ú jJ8"O_[J@Hٕb %c{s(,.m/GTɗ_wm%@GbsH8&s&-ːQbԭK>f)Gxtliݵ#.fE1/E4I'0epK,0 <1%tH+J]>f%kiKY~=7ć9$F'FUUWґ,zSq'237~FyH6D|q~7[ӺBhXL7S&ٛeFDb/՚5s n W{ ZFϘed{XJY"SlɛpjьR-|q,xLȫz=޷󭁸R|ieݕMn+6tpaQ儅f8 1o܈>J2Jȶ7TnW ۼY6*n=IhRQBLU>Z々GLnmՒzTKё4΂e_@vd{׵<{;}usp$B?}ajBBըӟahӇgx[N@VU@@R^jA}<4SLyb.B]<' S5nP'\x%A#W0 36M/RhK,'vuZ}E PA[TՇ[ͱ; \/|Ǘy3c Oi R! W1R!\Yo?e}x[/AD =Fga)wٸ*pBݷn#4x 5B }J~l4 QmnGY{(4C0[u]=|24clV%BOL" mF{?dܿ]z6S&q^M7}{0!Fv*aVD]n/Y_jq28=y݌jZ M0(iQcUM5uCsqUEa֕eg$~!vɄvL' ?>Зqh#d'[ -܃P q؊H+1Zې_53eGqHKNil捰ҁRUs lE()PP+["99]F4''9zFw7o/cOsh쏶S/W5۪qG8H-EF!!óuMaǂʃsh^!\OِR;^LY *"(d,NtYYҼ%V˦xz6#={YRTt}*&ڣWg=l:$$K ,S["ܽ \0BjYVX\/?HJ|Hm%Aeb񥅒%i9qߙq :EBPtuj@xnkqhIqijY6̮ȀlIdl#fkM'3A}]NqOU>xSJ#*>@= /y;k5ĉz19A9̜ʪvvYMAlE5;l"ͽT~ېQ{[p"~9 BVmC|oP+G/ (_jj"T@l5֛HdWiNdDk0#щ݃[[ gju9-H]9vΩFTׯ{M/jcݴd4}:a|0aQdㆎiչmv*c ^4Bj1kB30F bZ@TB~ϲ()[#MRqX>>ͺ-aCȩ@L H{ߢe3fډ;b\`;? 9K0uGܦ"mXs$NX (b=m! l]d@|}o7 vR7K3cYJ^ I D"1>AZuv?#EcJXzG3^PƥbY߈CT&@At+BqQ}@Z;F,7?^1n!8 fdb6dacbgØw%[vr`YZ].FC,]hhl/Qh&P+tz:`@r]%m?h4D9顏\mgҥ}3wv2[OMe))H~W.WY>'D%^UkF|aWk,g fוP#"ʩQHV `8|4d/KL AG1ӿC/% t7kX1|̓mCX#aHr`vN /W[Sbly[3D!3 L%)$%磕L5r.Z=p(} E`{aadǾ's(hČ%ѩ=/oOxW_M"=<"nUտgy5nn{L~[=EFbG`Pd[DgJ3T1S ' l6Wc!Sۺn7͵A*ݱtzx6 t95Ci #ѳYIkT.@nK5ʰ 0%&-NՆ"90UIh!Z*<evCWȰCA4\hf@[{r54~ɖ"GO08y0IjK-S' ^{O8Y9د4t[7V3Xb? Z3Sj2 1\ō!Et3J*#,ŨS*"wm57T*\ȯ(:(@V èߓ`C`zbrɃ!wcL)^gMQ#4|%` 0P+6D y G;Lh_OwVnahbػű#vսnPL`ӑ@řPQR :9r ͓0R )9SYJP;u˜c9Q[/)* 9l#^6q)' (;e|h}x0T+RP_V1P2k{ ք?WA7ҋrsS-'C:]Ye}Z{.VȬe6s{\j{3* ~LLwt#&VɈ\ܰ=}6Z0''E ^]K;c)-g㈝DsM.Ko@12Pf?'\`9XrWnf<dZL/^wOαiHbP[9Pa^2uEA6[YJ ՓR@eb/E ~t)EG [o\8{WCa%chg֥ q8Y?`j@;̥>3IU |~R.颃m: ,B|ApHeYn")ӯL޴K'pMp(UdXۣ촅M~F~?<0ЂJ!8Ӻwl]EeKKTPֳf'?OdSBw?ܽAo2p1'vc窈yؽ&6MF +`u9¯m ps3f" ӆDGRT9a1ä8Zc(: X΁OPj%|&~E*+C>1Y[wʽ_zY!cي.#Dtv?/ˊz{/UzP j=W2NZx1]Z@ L 3񀙞Y vZzC!Pv3$Ihީ. .v yt-8WYHb!՟1lUV䵫^\;lWwSyt!VV:(@/ȿ}frhok_Vcf U*yOFr_Gb48u x|"]T~/ɨiS0<橺XM{r x:{N4=cXkoJew^fv]IG- 9? PkgEha(  Ps LD WO$qE^d9! Qho*;ks 6_Z(-Fc2flb8Wu.7@Ph,GX@>c *%+*%cUO?IjMٽcLTwq5͘r=ݺuTQpH3X[BzUrj֣DY_a1I9ڸӳ _l~=7}Y) b 9V{^M7&PF7]v)40&-w`,A](+x):5cf;?ƴ!FT>tc[tX<7 8+;փf]yFTInSLiف=.7qp&>ΊAdq[-`×ݝ_u7CAܡxX<~'JB?g-zh^ l'a1}_suw Ab(cSx綋 T_{C0 I_jA ^VWKXEVdT av60~k+쐛)$Mhiavg(U|tRɅe(PZ= *4%ba@IG aB:Z جLA$:%:5>י{gKɵn~VOD.Bp/*P]t'H)g l:vXz / '|:9y9@ "Hh0[CGUq(evD\8>|%%ќѥ,HC~]YwFLܗ{HfEx}~U5uJt]K490xn?hbږ]KΎRpf;S:GQzm?o.-n;U{% (&YxC2(e >;yDDhyfa`Vݻ"\NrxQ-"єaUG+O EPMf?&4cp: oY_PTכdXf_`ͻipEoʧ-zL{0P>: AMQ0@5)dQ$ѱ*ig!QT,"-#Tb9)sO?fm /!|S} #j$vW ιm(Pa3pl+ mWh%6>TLxSsK$݀}( Xb=4]sջO~\.6^ٻ 7" @0ٔ @|}}CF `Ffh&QGɓ*+~rEYT1GE0&Đ|4'wb3Cwr?g=HUsu uƙiDiDm(N_R<4  3sz1&xK 9̎+Nǯ~fBtJ4S#2LՇ4 ߦc^$hъ!)ԥ"Y0ЧJ4+BguߖySǩ{WTts2/@k }$ELW"ht Ӓuqo1kuug{7dx ÅS:%_A4@z̮pqFMqwBAU]b85(oXTIb^Ú ',J9 SOy,3N{O+&K\\B$\u}b0 $>#$rKC`""peq "3#Hrv'/cs t; ɡ}lێQ^<“z2/&|F"=Qg7ab)ʆV{@4AQ,Kޜܰ%  Iv$\Oh+BW @,Ae~sY)ȅ.5?2vօU'/Z{M٭ 1ZlNqY/,! $ob>LŇ򾃉ln3Et ->B"Ճ 3nL=D0udt  /GKVPc*\Ӄ/$tPlH=X .J m4H҈ dfi*7U GΛ?K˰5YvN(v<۵_pG𱷅 %/n+pıJ7Vx = ~HMG!J@(ŊSK.t ۶aq7H.ip5)!14#Q/*uz5~Ņc@n\s&,QjWBb`^cGxr ?Q6yN2m@p@<Ѡ̌A>ȘNI>F1m-rVn{J wH 3ơJ]<1D~ de~ԹUZ_.x^8iP e`ܑ0N k@ؾ,K dc@910 LLQG{O(8osXfmE(|Ÿ ka I+qlNSBYDIS rLKL2m9+r<@S8eo9n- ;Pn<3̄aȟC$=ZfB B_7 TqLLވ ^ovD WaȢ'|`rTSdžN1*A30:0;QէqR'q&JE޻`V+ *]/ "9us,dP +gxcdH=sυ}bȳx ^dQ0Vv& !~5<V\)T|PGg]CTk Bïy @ j/9OtO[%H@ 0%^5HiN~k/Ğ}{h4CN[gÿoGcѦ=ÃP0 щ^61o:fC8KC^W>RyE4>u+6񭖿(xNm<<*isP"td[UcQtS.)]äҳZȣ Pm u)O b!c,Y*/XZt LEx)Wt\Hxglor%P|]OS`]Hz{K:!'jLR&ɲY5<{!S?`5]e *<<:7}ȕ.B. ,[!BUC^\`#ENڊo@$D1n OuٱP/'N+W7_#:~ƮF{ ]AW~K2f~ƨ|/5>~Xm5ԙf9oK5n*wT,/aJGT6?T}bn{!Y &B}ssdf8 ܚhFV4/ ex便Yv?xC]U`CSJL^zťBd2z Q0K|PXrXbB#wda?I%x}e6SK9^I_=A R&Zyd"@{P1R8=k40>0Vweoц#>]<9q=2~KrAB` m,ѐV]RHA)dޙv}Z/Ruˑ:ڲO w4Ww`gL^.3T,ԯP/%)ûNq~I)U%2=|, .o0Z21&]?V'iؠfD?*QBUT 1\. T 6*ȳNŎzٕ*c`I.E-6ZYf`h?T` OǹOE|"4.a݄h:"An4S%IѰ3׎bVk"GqDRҤ( #, d !E :h]\LܧSjԱHr1g zjG&S@s(ύ8 _Qbֵ'JR7Gl@xSEHYhUHup/]Ҷ1Df̚ 'VHTdiWVF)Qt^f؂>t^`[ #5*n[/0B{ `.`,", >jY,JX߮܂йfU  ?\u;S7>&Mz\kfJZzBq}:UDg1Xwa h}ƝNYf1Toӿp팵ESϢֽSj딱D5a-2 x)*9.vZKoBʥ!,D ފ`,Jf-3Ͱ 4!dG"W@ǰou5y#@ǜ>)*;ź֏$pOFs#5k4&_}{(_P <^.0,4ZKhzB}D}^t困|K13,2jt8_ P"! |oɒFA*ʆø?"jhCx:Р̗c(jb:Df{1o `PTTսShHA݂hBG>Guob2xAb$03& uo_teJ95Eυ*#WuDCe0#2:% { bxEJNUF, 3~RenwYMGEzSϣ+cźnV&= S4Bl y>>ud0`ԝx酖ׯdo }j)T:$ BB/J .`AN!W7k)U)[fohHMwfXL+*VZgu/3H;݁Q_y{y4!lDDY>u`=;4ŔDD\:8˸cy ".P;mr""׻=[3 Y{xf!(XyuDZ󡻺\FɶwvDDEͤQ:YΟH1~hDDMI_"(?J7[w+H!Jul#ƴѐ C((@{Җ6')['1xqVp]ݞI! P=GL3 @{ԫtze C"},S#[;:@K/)YN/J"'?>Wزr:miolp BbE nס@ `8;d n.h,>.>k c!s޲:ktJ]JJ1'q2YʤoCF"W$>hT L*\lD, C zCO8 Ao9;dΊdåz (`N6İ(!O=mډҮl~8o9ڜOc`F\Ȉ ߵjowJus ճy >Kƻj@0s^|!xF@ꭂV/hY Ru|k}np67;?7|)+? zu4[h_q\Kg_eֶ6/1L4nNڌ&jt| *Qu͘miW_IˈXESo2Ĭ\F.>|-г6n{)sHVȠ*^׃4u`k+@G7 >A4 @B난0I Wth @A{xo*3!7V0-n B?ˎ>]~+6ett C$u:bea C{W[ed]ԏ t. 5Fո5?e0D@|ϟCD"J=*= @Y}EkxQ!RKC^)Kݝz2p}ijUv/ƎHD8J>:^ՋbȌ.験@~%P|8Dur_L7>3GswR !9>ڦ, iX!|@oP[hWK? pJ_7OK)$Fvg\]$rB,&Vqֲ-mLxd[SjpmҺ^~jːr[ة7!ýzC\`A`O[V`敖mXWYXnx>SU97E!%KYGC+A tA1;-Q:5aqS֗r舃{N+p)gš/>9\ !N :/g +jotķy0 |j29aӾf1C81͠h.n}NSܱ/@D*M?7IEO:I;"P9 n y !0ao3JN, YtU7wJm0iŪ1'bU;n^stBl m#tF}AZb]::ߝ{qUoQ*$ UsaK#wԅP^bE sڻN]Tu _zחcRCs1# ? O,S6²DlR֠ƎŜ0ĨtT0Twl}y˼S)UOߚdP/tSKJ8͝Xsfzv Nؤ-FjC\xƂ, &'4 Gz))*Gwv\F_䪗Qē[`c75 vxy-~!jڍ=/cYq>ɂz3)cjaIɜB*kr\RjռWDz@L ^M@-(薧Rq,>V'hb?^Te!>Zx|eD:)_|FEJi`57Ry~!~ ~1ZiV9bYzmzmlYLm&=~UF=J_<ZZ3Ҷ0( ^+}6~i6r-ɫbu\kc藁>(:;Kp_aPU O5v~`֝I{oeqw^$\݋9"@@ ksx#ZH>؆2dk)hLv`ǩI⴦» O `}WtvGMoRenH@%4iiu|a{u"H 6᜿jGfݺ=v,g#+^=U!fwERKY:I(_!в&Z3bc<-VO8HyS^ Gj}Ӿ7F-֊?޾oubp"5yf?JzG]<w7HF(?x)*7I ƫ8mxa# w⤱p1K-3ꭌ{H5bٮ $;O7p(Z4 0^ HtH]۫AvU؃2 !S WAEc&l0nK;U͈#V~Aoց \y}>[W)}}بi@~.&TFS0Τ=ťbku_a2`a!P8e~/9.njJd( zNDqMA_Y 4hSS:&b3`}Z 4CCl+PbvIbz=]1'4 B9p0FP*h1V9LهB?_Roh=g1 D/7`_רD`(%JBS7RLȌ4]zyI1md w(lPz#=azd a*!x؄q࠴ 'Xm6cxnÿT .neߏӱg,BE;5UUY QdbzkrڦxttTj<](~L<i0stԧL:(M*M=Hfsi<V/|| k7>jWD شN1O۰ҫkf7)&Y?fxhA:,5;\0#Et@X) tvӓחS$ /WńP6bIJU"pHHG|Jũ֫OGCІ?tBWc ӱ=/ s3d$ۺz^.O6:ۗ:\Xx-PO0bXqpUJѵ +kOYo̊f{vt5S (d dR}dl2&m }rLImd0Rz*@2FDL̰G2)t` 2/ì8!O QFxțxQp2/` d+D FOC#>|2 _o .1 р&LepA۰@R Ɋiʘ!1`2.!׽$12# Z]CwoOyU-tމ%pHkRTBn h@.Eyd0:(TbyrQ sx>$r7ϝAELʹGJdcn zF€RͅZ쐑 TQh6X!L|&XS7ʆڬ4PΆ Fk'lu;(e?;ci2b8 &7X8(]F:/8?qnB"*^d PNOcEI樆B!LYX`7g#;+tmZo9]$ DU?Uh}-G v&FҜe ļ6Jhgy*(m"5Lu[8O%by WNVOxζm̗#PYhUAL]aFz;W2|۱v| n iOk9(c叇zTHPd*  fxx[2(b ACO+A ̏ L =**{ei)_ R0o<`_jwZ Hf /yv=ҼX;#V(DA.GQ"o܆|ix+); G|F_c"yV.4#" ‚w6\wA/w,, oz3"3 &B;"<,TDm촄+v ^yP]:Yzؾ fv$L~OLaPlEyb*lm>!s-m*HuJIyzXr<2'?0\9 ʗ!͞ S 8O0k/3AA-r4MT.R'~Iuw$ݘEJ n} |"#/lg@,%\זXVmdjQua$-~=_%j,ZlS0"2^Rv3D;pOGQ0/ŨN(3#oěFJ42Avl`t,}LAߐn#,#ns{:gPς.V8cYaЪ{ƶLBAS|ZixGwL@`Ũ_@'d 5<{2 XE̦Dop!8 -M/i`  Yup =!3zBKEY*ޠߪEi5~ U1%kbbd(NMo.SvەfbBܼ 1&Bz_1كH mwӕ\{0z2Y8B-Hl9XVؽ ( Kg@u+58@~Tw))- {RZ<H̕N0/@P*9檌6C]Jgs @.y,P1 }"O\age36 =aem[p#C(4$0ne>AR C` 51dvlwyea4w)r弎X@N`N}yT )82kD0w5έtd2TsĊ(FoǰHl7^.XvY\3VlE=DZqD.$Q:wZ~YlpVZdtkMwJo*TI_T`SCU ~[i?#ĶP;~E]mSAA|ؙID.9?rm:N&Q;^W/K^SK*\$ cIϞ`\VyS[d]LBQJ{[[м3ms0-*`m]ݓr3P eڥzmYe}_eF\r>~]=n{q,]Y~Q枓j̨~( ]d(89\eYL'Do7V}# 羈zJ^(*ι%d7b~F'3KPQqu=nEf%+m_c Ԝg<ЫI3UY h{b+ib$ڣOs}uJUd]lwywyF0j+kz c^6Mf3mnGqgW^g)Ӛ m~zU"9 k.zF* [l# _!f\Rj1@$@>te-rG PL&k?8sbИ0+?A0hFa*єy??ޛx}GG|wZ;ԷЖK9D17=,б$}ƽM:miw>KOcٷ_X6@M[¶7wxSul aٳNA҄DSZ# ſ;_vh BY1Tʑ5嵘PF 3m73~>1_0z)u 5ֆ΢X&3m==z-aoP1HQ iU. ).u F]݅+9v/; 7CR?Ma(EW=$rZFO|` P5`AXqZ}PGRYxL hX;P&˕wP^)Ji.t sq~r _sS9#fӑd_(4NikoD(04_"lpS쎡0[_nob5kB@v,(0$' l+L׵a`:74H-!Oc,}"~RD!0D'k/8 OQ$D䘢bMY!љM”3XQ7@4VAAdױkQ` wsY Jj-O:zkS2l&98S{<$}oggR7[ "OVDSHOؓJ4ek4&'w[P./K#} 4 {L__ʱdx*i1]^ άkuje-XxYhZ_vsE!=|=5!_BLYY'~RY>[{8t*u/;\o6"­_J{XsQӢ#+F3-npw{P&y|[+fEu!Rg9o%`W}ɡkB+qZ&ݺVcUVQZt *[jݾjq?K垵m7In!k\|}ћLgVֻ-]()ȫ(Â0ϓv|3 )TYpC22\&w^]ZaF#TS6w=j!. 2QQe_Nv˹.j8 2z_їCIS؛WXS_\k8'xGdc-RhD-4Xޤ +i N<ԡz4Ú|֑LN_os*(t`¢|HKOlq'T(~yh>}T ǢY-"dFgİNp{$/7|Z5?"$NJ H0i:En[R#/ hO!xq8GrAkxR#t|"~O_!t'[~ q$&Ae*acf7)6ڟ hzvzg_Tml $͛2T&gTԈfU1c5AS-i{9 ~wu3s8Bgudu΀nAXד$/@KxEP8A[9<_f<('~`@؄nh s?c*ƘVJ pR/zbgNc >0gקv 1 VrC7+>8T>M7clQH%Khx߾bJ!tF-s=OYAW +"ѣʏÿ,;*GH!G,Dǡn[ĐuD1 'S6H,>K^&VLa~Rva5-'Kޕ:8Û+ {jkgic3ߡʮ!=[慿Z>`9_Uth%@܇"o^BzFްH$v7?hWnKdƊ[}1xcIߺC1Re,ĝO]"\x"GtztG?N{.ʊcF O2 EW[P!Y ]tSxPh_*>EZ?{p!P @˚}PW^]Ʊ>Xvp6{ m:y_C/ !ߓ+X ~S/k|wG2 -@2?TCO ]qWJ h='ck=|)d²l0}M_}Yp <>ek]$TE"u|j9ph36Hޣ`t4'=JWXdM’@M7vKpNَtWV)zOHNJQ_|9Cm6*>-9@ <7"3S};s~FbٞY zIP  ?^zAF8s [|Ujz܁伕-Li>X5NTn9+}9xKj>-#^aԬƦ<믨SojvS$FoMc ܀`n9dnb 4DJCA$mam;R`⦷ټW2%Xl9,Fzhv%y+9>dKrcU_̈[FAPQ|W|[^  uRD\D yN:aKZX걟$l"O>r[s2J>O~]י*_>.W" EN]JlX&Yn2bWv}G킦j䂩tKo)hIPQZkQ-ap\C>q^;1?Kpʂ,-ʂ% bm !DY幏j P4?7k lbafrNhJӳzZbg3VflYAɟ*|! `ox/^[W6!![Y`; 6PjDGWٸæ Fp4` S:TAUM72$#[+b7l JљRJ=_'AS=oz\W? 8/Xh ΥUWoS/e Y+˶8\dkyjZ*i;zk8w/=Þj>9Oe.Gx2%>a3`$x^4 șBP5ѕ<#Pk3şydzl% #" -B|YosqC w 5 jQ!gMx レ ,eo|X~ <6&)v[(J r?f(fNEŔDrhj\zvhjj# #3H`/]"\KhQz5a`RɀBEg+*_*@ضDd&Qʮ%Ss'%=?|ƫc%iCqg2D"cG?zobhR =&*(yl#Dێ)ZSGGTO!zHCǙ -н*$5dR2>>ah{` O(Ky~h mh ]v+vE"l Mc2~M'Q>fpBhw#ߐy(8V,*w.U7Q2KGw$; /JÖxKkW;ӃZ$ds+\g\#5*ɽ쌒hny|j:\(HeMe?"}ԥ1P8;/H+b*Y%iNlI&~i$wxHd +#~4٠{t(|]"wLjI84 q| 61-2$hz%R1Um7^,A0$G{XĈ%->DF+wz4HѪ\hdpgtG#=F+Mri_Ã*|ʢq]B\IrNɼ=)j,b*7MW c*qjS ~vMh((:C35eO QLy5aټ??;W@1 @3L0o'=;֠^9}Nlb=`~_Sk0dM򋴻SD fSqJ^(߾2J݅зvw=^5 \I 2TdLJ'aODbڡ YotT0ժۜce 103, tsC?7qY, a ;%B3]zIqY6#:2sP|qS9DϮo]V'S4C U+mEm EL'NY(ҤMW@`lI7qԼ@\F`ZZ ]rG?TPioyTYƏcţo*vjJ^^?zRˆ5bU>MQ @LR3lμsh[0=K?%74c^ g =o0EQr_T>sU >1s"CaNr{c?ya80DDPL.¥_-1Y>Z+v}xabS'(ppiD>@uU긂 4DOAe/\&|BcGCSnutgPBRe=? {lbݨO!^~>Ao [ uO6j5 U^DPW1:6f4}QPO-׊{.`TBim>0cHتQjoٱs)"t``čDS d7S߁Zz\}!'<ͼ-ɀ,L(j)gizҗR ĥVJ<-?wC8 1"e3PC $i+Å f <60x464p$lD*q?7d)t<bcDztEKdT&nlȠw왈<$K>'rGFEnRrHj7;l}夷RxfaH8BᎂD< #K.Isp1;JV%~CLP@ e>T~k0)U]Ƴ8)ק؇iZg6==Y,H"8)^' ԛxKAt n,0eFuݚ .rl4鮆65^tv!q^G>A [fjBL푖3%&īJ!"6,F8t i z= {RFϰt"<^̴ʳyl"=f5C+Yv阘?+!#r Uy M7≁鲭}G*u;^è&9\ZbP&c\c s d%s%mcEHgmR(Dsyq5BHRbjپT`8S!֕mʸ Ϗ޺xט %.yJƱ}&~H\Jn9.OE 8i;Dl\L_HHn4ϖ0 g$}Q\xkZo&Ёr&<_ެ?]""9;50 S1xCsĨos'FXhJdU/'D{(7x+aN ){3EsE~<cdi=O{bvkCy_ManČ7ΤwR+AH"lwOԊ]O ǢK\5h#J4' ^*DKcDg[E~'br] `؛25D3.(2j ˪6ȷlh7W71 ^0OWӥ_C'#{p!$M|gLKrp%a%SGkT*Ue2MQUEl#?)^A' Ϸv(&88U /ʽ,bRCW0Y!mJL;)oqRdmTf>&*Z-¥̱gs z~Ү\x-tf:ep(5ڴŊ~՝|\XCnSyP> \-;h* :#`2?>Ƿ4ۋ$8^p`y< ')OZ ~'맸>q$SS<9vD;ݬj\)YTw k8vJGբ!_HOáVKxG2@Ư͊e4Wz#6SIrQ/j瑞 [(->7\嚰u|Z6q+ >Z%$\ sڹou:V(ČٍvgPz.4q..r3^i- Ùa֡jHy:m/Vkde=OD9Gш;ei2|y9yF*dX=-ԙW$JUu>]Yb}D{E?Y%sGbJ6q ftq@|JFeb'܏YC$*UTh>=1vmeUhGdUI`RNvV jlOj@gϿ* yNhOn~7-m ksO~oW;űKF(ujD|SڛϽ@b3^^f$G.W%.ʼn6wfU碵μC{ j~g>#I?S4vEKeH 8W},?34*=5$:޷W|ʅ!bVW7{O?=AbrqzCS)jKcځatˌzDh*;ٲl U2h*6n/T?O򸣬Њ~f_ֺg#e{Qv.t=\u=W@_Iu\SpY?!ޯ*~D7pPޓXjs-V.0et6cx&ۀ^GhgƜ[`g{'PeU2͵Hb2g%G*2\1Q{ۤyV[n`WER^jF6葊ިˉwW3&qCዒ@g!{ WWڅDo |*vW5eeuc,?j.M V\T</"zWn9/!1Ʃ:l3 ߴ9m΂^֊: @Օݡ$zq;N67jn<{ؿ%?~ת6luR1D]VP/ҡ 'T)@so t~pd40+:x,13,jXGu6t?Xvn.οo$WwZu:Ğ:.'d@(%1 eD{~@#\)bѧ1EX3事nq`-eb|u-Z?Ncv]+TK*I݄M o٨r:)]2sR~8+6}n|'{@vXEї>FB?vu%~*tFE$#c s+q$bN4D: =TY|Q8LyM@0#u{SoW5YvˤъԪ?xy~eiQ#bIuF`:Ǫ-#czϟjuZ*K p1y7:}fU0ZR!J_W0iuw{)zxO{'t% xx|-G/ jPE] sE&bؿ, 7]{/Kg(A.߄iwh%_*^OV{R{F6?zUUֵo=d-n(xqR5T=f5_IT0+I1tJjE]k o}jLU2rxnQXyEQ==͠PX,٭2ZLpx`ϕjO/b*o@mJ!MEk$qOJ@^!|HwP5p<{FҰ2WMd,UN;bYoY/?Y1ꦘq#GWzk#Z/l-h/!~}J[*18fDO!EOn{Yhҟ.HyNLtq]|Vs}Ce:so$V_* C1|X% MhM`^ !Qm*ZCӣw.Y4K*)>҂<\qj |É8na{[SBwA+[f}jgNw<FE8^ny.=HkLfy:=trw@Nso;`ؘ0IFӭFe8$r<, JbǕ= K23w B$ >MA `1=eGYW#׳i=y?y6ĊT ~|KՇ1fRbYͷlR"'0,!^ɴZ) P#%eFjUO "VFS{w~Tl)V);/}֕ט; p+:][%V 1xr5lI3R2Hm~׵̉8N-1i%łaJTvL7օ>K`0Qtb U )M3ϠPDnb ݦhɧ/x8z _tUd$z7H8} ըGbnj;و L&a"a& }'?O|iVhʤWr?hb΢4痷R)΋"1g{)E SNtsL!S'pdRpC7V(` 72=Yv%CIZpvexa ˸>>,AL8U`D, q#mk]!s, ܂E<|/׆YȨw»2tfƱkjZ(\cI~$,Z…ϸߛN@'qV/ՊI´pZ `QmjZX(<~T_AgJ :yhS=\msCX*q҈7Td$6W[=q>q7c`6^^u[®vypQ0m8{tȆ^ozW"t'ZqT.@=i@]RyP``5] ֶS׃OgQV l㼠2674l*_/tH':MsYց+Xh(^EU+Wil|-N{u.L2JJ\p88U 6˷Zݜl2g_xi"nb,x8CEU '@T0'X~o;UƤ0;i*8Gbyz ?=`_޺ <1ɱdIx{%APV*LySxoLm7ҮD_&.8P(krJ1pFqh؆ ΔHJ= 07!YHn{@ev|׊Ňk<\F%XIp\2]"m"Q4* 2O匐$pPqO]ZߍjrE}t~O?gP5S̮Wok䋎l^piW1F3XsK֎BƊS2tJگU}\0!/\/VE7s}QZ2-ėo䠰xE 8k:Lѡ5),18YW:=5 !Oe ["bfz2x8h.4B|0gƷp.;f=ye ͰgzDddw*Rv] E6\PFln(=\WU^`B (ʡqctR71a!_ 5?}"&k揙cT/K2޶\sQMcCu|"p1GGiiƦL Zk~F(8LnP٣2/W q|6՗Hy'G؆@-L'8aq|,qs]GjR1ue x-nfI2izDD24%؞͸ei=D1:U!1o0q[wu:.1/REZӛZe6npPUQTɸliwjX)Sjuj? Jpߒ bl/(%oyU]EcV uUf$)L:cF!@`?Z?Tf>ue\/dž삅#nd??FG@F K|([_p 638HZa|=IAgU{%wZD V>uQ;{,.)뤡_`aK #M @_)=ȮG"ȃ|D7|gN"̛kx:5XV8m fPLSP\NW\f=Wh}Okv2pUw}PSrcuW@ alBZ'3d P+f )Џz.H+ { RJm^'= v@ nW5gl=pި3PzQL-ZmگBPbxכG]{@K(C`u'òڄZm愴G:R;껺B}u"Z3}?FG.IM{E)Q!<CS^}ԑpZk^#9*׭&:#rJU}He,wY ͦv}DÓ.&#Br;:g@Yid|mMb?o ,H̻1SB"aWH?/qQÁ50P=nf`\x7PFIz-x#~+gDO,Ē\<=͘`R#W XY"ejO ,,D :fR%9dȑ+.Yb(:(5"C~=#`P W&w5}5>W*0IkO6kDiOC0g90C"= He1}|xH(=M}80i\Q/qRPE N@cv5Wm2-%gy(yP)?x0kFq sŪ8"%@f]ʊqS 8D4bhZ AZrTBa桉h 5*g{z3ЕG'SNU-/*\@mW;\&YrHp08ݘ?B2񩏼ɡl>3Q9ptap/Z_ ā碍TGnਣ6!)Mp{4VcWtuD$לv j;uZrR9ۗrW!wyVۃڬ0;J º o@_ճbJZZrSy: 4E"P*P >5Q%z^ P9B;q[C bL3FoT]f?H'A[?QGU[eQe?h5YMlU?]Yz, G<*D7"k̆ENsp$w)Hh; ,U!ExvY"|x"љo> 4Yg!.93;T=WYl߭I}קAJܦ}A5`KYkcMQcQV>%p;RDRz?Уپaunn,BwQm-Eؾ/ks܀-$UM;mݖo%;GUO.0 4,9B|?Fcߕ[~0ywnLp{Ux|HD<t\k;aOUJB%aw3O fhnt.nSfAy/v *)pF$="-Л]C.eMc,+}sU` CHeXttW{6 ٴ`c?B \|AiBP.b\cR Owj>h'ʠ8חPZ3Stb )P(H9fx$*CqJVbh s\bN B会&*_wSꑞhdMJ{Q;L'"*/ $x]ǒ m& O({=KÉzZ{RYo/t%,>*PB/{(|=d쬣S}#/Gf1w3V_ q[D-צ)`L=\{,.JRD3Ѹ=srIOշߣ s$ߣls,o?}$l7h հGn=Uc{{Sd(Pc 08sNx󥭶)"W{OEZacAN+ sX^@rP&ԧE^7вajjd0bs(X^ rَ |zqAZ4ZEjь,&s=h0=3)vk_JC8mB.ٌ<]$@Nm!MI-gld#%}x)b!)ߌ&ׄ 1h X,pFY)'sˏ*PM=;yu#$I.&ĵnC)Wr<`A˧@)3ݗ)?I؏1XrZp0-vA)2/{G=&tdi[By_(M^]PQ׏Ń% ' oL Hl ޯ.pwDCkSZ.Enʼ9̟y*╚GC6):DD\@Wh|F]#0nvUb؂eU[[Gϰ%Ry}Jt2bZԽZZ uK BIm1Q6e/s|=ۈNMB#Xqs6^@w g3$ @П%1*2ޓts P'؏) 1'95!HM˽sמu 3*cuEv "F7e*SR/L,c jt0}a8sAUZ9K t9ώ|q[&O7ZE}4|l_̆7`>{MN9nh ?[Bњ!xBB}+߹VlO%}O@?OGZNP+3 H 9' Fx^.m}үq_nEOvl Np  @R_4Ȕv#%7?Kbō{iB*L FvjC4;ʆ[+n!0AUX|R wMT2\>7 KLG3oXYTv?Xa}9uu: pW=3uP C@CAcC=-" 3˓we>Zۋ뵇_"*SMw¶9972i9-^/,\ΦQHx! eki&D.rl̯wغy;0%]go5]wo3uo"`9cBv-%yh ]JEy9Gp" Nxݥ;ˑ!p0A1UӤzx`Bk +^4P|tom|͟j tLߟu(z4Ledm+Jˈ:wZ Mx: /H+z_pLa7iMƊߑMu Ǯ;[*P~g2.`CiSc^j(*w!8ʱW8_)'ʔxa` .ŶBP1,J\&"<A)y=EG`sAY_f8˴$Oh'{٘?:i}pI.#|mBV(Ưbvu)I2faʎNX/pU+\[Q-GQ+-u7Dbx.n|ݬV^sÇn=@݋X8[jf+=菝<yo@llPo!DaX?ŁQ r[}}u-+ҡJP3smPv$0 @:|*ѮLZ8*M1rFAe=2IՄx1@ & a+mG)9zWv!6<+YgQ N H%?JeoJw"&8: cc+Q<{c ', -\XU`5"/(&w۪knxadU(t4[P1zHZy˭q{BXD) ߮N%"1ऽZP)L4b,hm B,CfphSZD@AN4pPf<:V3`Dٳc4EUSRF\YdPy׺h]`%ZWtz8 sj80I4UvIv{[p?͘\׈yTE#d"±2f0PKY T@V Pk2;;¡ 苶N_*hOEQb,H/A ; #OR Jy}s!ohsĈäk4%ߧ;<C +:l=z+'pQRqhK" ][.ZԄm=z;/UY@2+a0CL {0p v&oH 4LNաuTas.j ?5і4{ `9Īp"Aa5;z,zX_b kߗlga&Xdx@{N ǕO.˷䊁6#_7~#Qu@`vhFrY -HNDb~y*D?V)zqy̓\WE*R{^*088ly૏a;׉zÄ.4@>(_-kg*MفYp)AKO*L$Vw*G>/0%@ʨWI\Dy'^]LW*NhdW _@vh9,j-:C Ϯ>un^A]Onb%zxd+%=V66új޶3pY\6/Tt~Ȼ^p)N-k(Gև4ߍ+WtARg4Ho l%GU䳺Ԩ%,(<F'LG3Ijd됴w/lOmķRqT:Z R^pp@ק96gŐ~gk{M MCIFCR1 lR"S = 5gS `t{E>{ל)#Wl3Nv/n#$r'D;dxa$6 3|yR~`v1M]3tM۝Fm'T8O j)hRAk@| R'qi ffი>MGN@@xo$c9ˑU~'`!:p:g{%vR;A&.Ma zO_@;[G 2dq,\\ڑ'ĨGHM9-뗒| j5𲠫XkD!ohgz6 , @O'l<8FBu w)+IH%J8|PJvg[}M@x) 5f (Cmln˜ h ?'7Wt\|b6_p9x/DwI}D@9|RYC?VkYg1 sA:[r=!!&Ps'Z(iT&ۈy -C+9pٶYӪQg7qp}lDlL WZt3o¼TǐH ;HIɧ"^Zx{wsX]R}LZ; =HٌlqedStW!à1aÿmӤo!, -#]Ն1-HEq;ٌ[=/dj!W9y?E U:5R4XNL)Ң>|4:8%8"KزY>/x|"(/~}l?FV[J-N P8^i|s"th7 o J遒Y.?B%0|g@s7|*nc'@huV#KRhK*2giݲKHs%Pr A}7-ˀtKzEoHQ4fWmh_ !=MFKkL:e) ㉥js4d?IjϾx"#MmC,1XSܺKtwXg~oĿ, ~|򚭎Go4U~o^v*1l ծTW]H򀻆%]je|r0OjDEF{@#:Co缜 z)f)wXw'S}f:PvB؟>{/CMEzm04 _@oa.B_O JUaeV2%(P'I%zú93H"%>HZ/v<3XU;7av9'Cv E: x&.ָZ8g Ut.7Ձ_5|f;*VXqe grqAYc伝]rG,b1rIaC- u㓭ook'vb9|SҢ3f(r@~WpNH3(Ǿ w,X}F]b+QO ["0xwҹlnޥc6D)؇?ٽqfY), =M}pq8Н1\m= rlFh z?Dno#Z$.X@5J 1K5!ldБHp fYߩH_:xiG݄JOw`X\ F{9DLnpwxVV,K"`6 7owɨtC$xg+J~ ssiJHjU-/ iLFvidO (;hh[SBby鿗Cf,)DNiis )2Z%fd2^'X%v3йMy~?e{Gp88/ 'HZ>ë޿Q_+,g84ߏk?9zBR*! K&[;N!|bY]@3s,8 ES D 2CHNLDƣ}fIkmUG4YAq T!TӝSUz~kG(zj֦TILZLy4S{C7(t~|Lr"j NÑM;7J]Ic^zs]A]&k(:Wr|ܴ*F㶴 Q͝^YmCFި?K}$#KȃƆ(LY.(g>Q׍n465v/'?uߤ&`z;$[>wW(K [qrYcyJ]yqS = ‟z%GtTUE' $EKhM&y'r*迣}Jن` m`O?mk%?5AH18Y:0evY' 4Ǜǐ?ZßEzk/FZdu/AkS9o\%8{<6;H4`;zD)u,е6NPc0"` 'Ebg+PӼ_68}o\<`/Cz _d%ZUEح߾cg/2y l_jn%"H z KsDg #@T0 8k0~MJWoլEA}DJf0k!@k<2U#ΤiF~hF% ESZ@ GdUV0>]C'V] e1.c& J1]tEW[@_Ԭ 5Pg*v,(SLʀK 8j#j v{ w]gmvXCG&3 aBD:Yw;\:72/iXTՊ$vhfK]XxA i_q0+NR)hagPLl@,x H( ͭe@y9 3cBVu2ڵRU|82"IdTr5&IʭzJFd~FtXlXH)@zs`b `l&7zLo oQ]jz K7SY0,^z<70NEտɫp0M }H3 kوZL d{g߾$8b䦻:Į}ۑnʣNѦ#+'˽/GQ6|R?|D1hOmUÓ&BEKgwT%YSY̌xv:J(D&9~d3׎y]fI3uvc#OܭhΆ*hnɁ-aqa8Q%M%˘&<-vyիRG{7DER J ?:d 8q.*Taey8gU-=

,mNiTyAN.;ٕ? xU m߲B@/jA")"R0S@ 0:X+ncZ$I}_S}_BMKHӏKgGCo|WǢ9_U*aa9mxGq#e$kgO05܄ x-koGAKvݽzK ㋳1HZs` BD֎Xΐlo&؁}L'Gc:ɯI44[p80]R@#w[Oؚ~UȪhQ׃zLާk+ h&0U6Ɵ+wyE[cim;3rR"n@jd$J#u }؀Iq X順0XwWdz2(@!Rkl0HvIe1o&zwXFG<˵n8`G4QUp>PQzSPU?zQWC补`כ;Ȗ,Txgce`O Ycz"tnkǃkCKX4U`{5 0`ǔ ޓDp$2sfD\E/@H3qYpF+.`כ /u!nAVxЀ/}ĚTGaC;c0;Rޚg M|}A]3@3 ^o_pb5ٵP3 r"X-~#IP 2 LS3!,x\GYكKdWaƟy8kl/6 TE>##d{gdW4Cѕ}} z}*ݝ6hrkrZѽ=Ȁ,¨іؿ,Cc c,`M?T\Tg j}40Di\flS2<-ЎC*9rpC_)E8=z| }oCg;- Y:NYiU=$Q60Ufd&+d ۽ePbQ՝jvIS$D\ HU0Zxl F1gG_Qu!_GX+iҕ8TPȮtpAIk9-8Z3f[9u}\0f`N5YcP.[~dgΖ!PQ 4IT9 $y(Ce7q [X Gi*fMtK9cHv/'.%Z.w\97:,g0Yߌ z5wi$5@HY3`Y&&|TS1ZK4 y0Ѐup5 Qoßj{ :Iw9 9gi`o`&|m(F}o!؋rtdZRvD~A.r.@;&b]Z}fd9}{{`lْ88?ɫƴ_0BM,Bpe$ovа:ʺcfxU ^ڰuDk>Bx;,5wN;ѭ˜~o%"uC!$󻗭sӲvs 5ٕc5Oj ` ݓDWCC}DQ7X:67V3zlq {5/.yOmڗp#+mOC9X%0RX\E) Tle,bɊh#RzYZd8ZH3=.cﶊ\yY>&1e .Vk[:.XVce[:UqNn1&۱䧒a&3\N1oa'34x|WBmT8*r x^cMօ 赳0"^nͯE3|u{8{z3ձXd3f7DQ[OP2% BWj0B7lsϠzvNt%xT>*?pg EkzM4iF`Z i=VMG+#{/w)=V8'@M5a|(som`vQ?LbacGؔ聈Otvy5y[ͦOixDT<5at0;jinsڣw:_zYr&{L'030Baز$;%͉Be&+6fQ7Ǔ[,Zq`P~+;c'(G["&:4?7_&l$t8LOվ2t=ڌn]f>|&?+4Yoqֺce: /h/M9W =sŎ2G5r:1:䏤F lS-}nkwHf bڬ&]iabƊMo^hԃ|.&pJ<R/`0 \}md \khXΞ\\f ECΪCVGUb p\ @>S@Uy25Agxt>ĝ&Kf #د=-"QPށ9R=13i|_ my{ϐR9u |(8Ǿa:Lܵ 6gt?L$%<@I m^\J"~j.ҡ9.ov$6踻dX>9};Eju|Bx$>/r8DNSaɴ&po4H £ ;Q(u8@{3z'B/]Ϙ2P"P K.8wXU|!eKV3 ;`o@D(P,_4_u? 1!b*7(E79DfWՍq׭u[B'ALUO6S^|A enmAyxy1]p(ss\@z= G 6X-PDg1}5GB*^.,uåvv7uSƫc`'< 6qBy*=ߺt=I0Q d9W,!RPZ͊iM#lBoO{v1\ _nGkBAj> M|tؔ]+n:]EN0MƔK/Ylj[E[\V{ή2iťM| '^ĨoRړČ|"R/|bs¦xR{fT$Bɘq8H 3mxQO~>--qc+gV@tiVPU$>I|vsC;s岴H3сݠ`׻ꎣy`x)#o ^u0.$l+i}G \ $b.2, !ܱut͖ `Dgp +;p"<8SMB]Z >SL+$DJ]_s*_oͨJן'eJ*P [3{y@ME|EmdT9[lлV6t܅!vzk0jFGSxsu?,*lUivئC ,'J 0Kq@Og8 !&P|>}ް?{U,~sykcY <>Djw\&#>e: [{^JD7&>gMgKY^`Ϟ5㱸nj SV Ӕ^oRآ !!5'ɲLpR3T&PpF f ,7"XRUxZ/m"taJc? ̂ s40?!,Aa(X(1)H.0/H!gwZ2.ϯVlAtg@-+L dQ;L;ဘp'GWgM<-)&vC=8@zˡJE/6PAY_ \0@u0O3q*[QLḚ j**=N:w)A|zGu'#r @Y)nW A¨XYo[ĺ [ VC7N(1ǿ}A2|>X=~ljzfqPOɮi \蝓5(DJ%;u;ӫ\ZmԊ4kpoR[}إtjݎYR Q8Eb>'-!lpV1<(;aYtF'cb.ُ9{ΝI$/SÜR/Dܩ\R/:%v# ;}j]7~83l_?3gil34L-C?Oc}'Ft"w[LXsvyLjI+6T_6HPڢVQ|ljmYJ8oHRw0B.zPUGTpquǣ{H+ a\2ƧaS^%Y Mfբ*IAaI`#٪&1q^ R+qiJAä*H2-s9F - ,+B'`Y">UxXя@BH?8 sEjOry⴦ȟc|6CugR%- % $02lV~cnbX|Y /QC"(tA4_[vJ ="nk*MHw#7#bX ,Ԝk.w0l%R8PB􍧢p%ު%} kr5KWb%E*-M:Sк'LZmu1^p=di#=Tzx8#ͨx9RU(2SY+[z q_.imNRG|I'[Ȯg$ؚ|7y-^j06H6jR`.r+Ǹ*['Iwv:ίb.(@8y\+GhLJxH& !YQ;Uꊄ`؟%Kk^UYd;DX1AZ_X @;G9Q)!08\uJ):0 ԉp8)d8ySլDȬ2o q}w:F+6hP+.U:&+}KLNy hY~Q&)IayMQN #;[u(gݧD s %/b2^Y{_G&K޲ cfqMFM kgW< ˃ e}첵r73v;c/:A lD'\Zpy N|7;qPM@ t 3/RUv[12؆n4hgdn6D ݶ]ͷkT{ ӿ`6;ZLea2:-34d7y0pLwl7%1H[," cp n:\oP) ۂ 9yn~B/!xf n!5Q@'!ζ+aD.1bTBoPS֛^C7͗on"U:v&`O +4R+h%hx=/ tv4<UZ'M$X0/kgBe6;tx<POvfRJ!0.} K1XJa#8'Uʉ!I@is.p=D2i.VA@[' @4UJjԶ!XnyN?-mP@UFX)nlY НycCZKuBfGAm5P߉4ʾG$44 a-:7on:aZ,(y2ťG{|Xy}7i{09ηb>R,vo^+D8[e!`>]5~oc2|j9i=K UDڍ?즣MJ"_ȉ{5l2@wb5-R{sUo4?5Q~ߕW^[v1*B=hX6# {:؛.cj|n"!la7bpPЦDya7}HXXB5ɯ= -A fb'д#\x}uQ3fm&]%<%< bɏzL}tlhTh/*1xl@o 1b1="^G6u4"]Jkl0*(}fp\t咁 h8 F2~eg])La ~IupEZBib à]!{z)b?F`e7@#T!٣zIxB*2WhzD ?LDgFƦxC{8`‡4 W@q[Z,q{t_hyD8R:p}p x*̙rkqf mQ~ Ugh- ^"2aoL ʃ$gXvvC^ndvIz5'D_~I~K4[xA8vPVvZAB'3Pk5:Xft1yƤ]GJ- 7^>oj u$ QP#]gLleͧhUy5hOq @!8:4J ]>&] tBdOcjJ>~#5zñ(E /d# ˷k}êى9{"%MS.w-B{c1ЁI\vB|*`E{ΌK\ Ya^ҨH.A"fDzc5";<4nݒؖ-]A 2ʝOއE.v=+cIB'{e~pa_л$$Eⱬʔ0ͫ z*'?b'phWZrjX>dBVK-lh[k?ڬRF5A ͹^"[cVN7=ޮm]!@yKfWX3zVFjNH[[Y^T9ρa"_ !Vf_ʳJi-T1U||[eгL$x=7 X]ށe>4, 4nBA b m %@[rhVC\HΜ{Ț+ 2Bt .rɳ3}lY@ gv$n Ƕd@=oaXF^y(݃$aO jՌ|-HWGniz5%H`T1$@vÇWarR] *Uvm1mYsY3pK>ں$LtV.y1$!Tcn9 *?G2-by1nnad"#Ğ'j6% håYfڽѲ`=jh圡}=(Kc3vT|cu8աl U!L? \F_xx[IOXKbicRqboZ)/6EY0l*Z6XO}p[Q+48n@I_A; pЯ:Vd }0*Ql%ퟮyvP wA\|2LVOg(_. ӜW[@H:Jf Gs2,;bEVhKyhk-ؿr?`K7BUE*?vyE6\̐dn('3da:A͟t"mx FbϠE_?`$-{[q˧ $v.踢* &4QY ߻5_i_v(XIYߐ`iM UM" !e@%^n᥾{G(b2B0ըdoRh)wYа kMAvg"#BLBU@F Ac6,R3A+(bM7ax'7n]Quw'KnlX{udnTd_(e@DzE=Ns0*;gy=6Hf'Hv>5;} tS mN¬C}vݰK`2 VKÌ$&x #y noaP[8~.#{jqh'ț땟O `W[Hv< ~=).؆[w~_wԲӶ8T*o†&'+}57\zrPFGQ9xEs+ p!ȇތe̐Ѐ(kxL"N҆2&NkHLgm޼nA#fF_J3 J@6h7ij_b5x.?-o/>G c%nt@E4P鹄pr$[4<_:'qoO}ᶪ YZ;x`;V Ǔǻ6%2p/ KWLCܰRCbz/5DsI5Juq4$R@8 k՟eэd4_V>ey'z/z}x9:xrV2oXՊ/> !?26q}#h3Jm^`77D 5u+~ên.92yx]°'FkH\dյ|83ГϚ|zKI`t|Ź@¨̬/ :X9+~Ǘҙh1l<N"MFXvp&(SrLFSIxi—O>9ף-(wv~y%܁6d](C р&򕤉$`Z0t4؅}g\c@DjY aЛs7-56@XAL>F -v{kX>3]1RtKi4fTM{:B)94dC?9  0DDTNQhi&EAoW,bqz"k> 0󑽢=1L|e`#٣ h S _KSpTTamL U]BZ2}aٗTubڇo@BdH@ 6 ĥǸ>gM)OκPli5XiT"(Dg $z%] 43{,JAaAZw'yghfW/a\>{,9O*?d t3QRw0Jo$9GƢnƝPEP  8 ]-FZb[_;@qqS'Z콱Mgޮo.i,:}Yh-LN{ʬV@t\pՌv ->7!MDuvA}X|@.[)6w~aEq_fH!sq.Mx YR(eM0QL:rx'r`Jd-dd  lG@dZl;k۰r.;7}T^u=>.g$LgO;xbj~ҠcE'+4 zNҐ-ʟi3_;#:=oџ[:Y997v^oCַ*KߕfnT; M0q f\BcU1OB+}} u6JԲ'7E; ' UQ"ƋL34a#.=ZzFVS%U6}9b-xm:i~iĤx<݇1btŀ{R3b/K51!^qj3!", 4& . +;'] Tw @s|fYϮA쫗5| o 2s|?5(ʡDkմNx.3lcU NU98,⣁yze`FѭW~R0 yЍn~eHO?])lUu`7$%MSHnLGF\vftޒFT+Oq܅@w2a\xio .oH01@Ee#Ö,nAoڜ֡X `fCd%!+)Z#RXs僠S_,~>pނݪo)UiVޱpdŖ p8LܲFE~̬& K^&7Yz$i@9lA] //`ƅJ/ۑlMTܑo>,u7-bqiWL;_bS~xwBp?8%4 / ̃ȣΥQ|݌* a{C hMTl^Fe}E,aѹs-@P͋4:jwab@'KW`$"懼u+D;k7z jq.d!Z mLVf[eJힴ/7ZA)KY=I]=`ِ3՞YN"hTb~'}ʚV FGx3#Bi%n-d5p6<پ#\.s}P |KFL-'!K*,I" ҂<2ˮVX@+u3dL3>48kiPM.o*+7g|ly(jH8 =S%n?~)>ītןH Y!U?!x\r2mڜl(ѪcN۩ JglNy@C.9 ܢx#nsy1ckWߖ.1RXq.qZO <;ĶE}ȻT'pY`J+p =F Z^d= Cw{)o4>;'VtΏM@o!mB1\ ΉdRnS-yܢ{'#JƜ_혵͏߼0/\g7,UasPQ% .jIӽjMΏՖYcZ-m6sB1XUj|g}P2(VG; jCPsbwYqJG89NϱF\CGᅲb:U#9m&)݈[Yp3yakqF3ʊ˸t)C9b`EH"$w;v c3ERΈt }M$y8jŁY*`=8tr->PxQEP,?D$lJR;ש1.aZ.κ^z{ya %+? uQ\|&oWI}tsg7|jxZtbL?kwǴ dok0#^0v}Tda0BLDJ]/VNU8m6z^o*{@32 cF3|Q @Xc1̋UoJ1|K**ЕdDrǕ1]ݏI0ɩ_j<$S6LJi[.;Ā]f|:Ob|CIR;=PAAx(^d q4WZ 70>}WIUH.$"*D?x [zx]ôoppȞ4 "0ӯ)pK*ˀl)!j#]LHlO?W)HPw:{M!4J 0xSkj%Y$l-ߔŜ;#&]4 `cm.VI;[uޭP~h8ΗF" A9GMx! ,͓͎?73CXJa[р:˹_ uQ.`. Kz:HIX=äс+=b% 92ԤZp ӆ<5Gy1dl@zD'[9 gN9DFFvkZ8JVc lݼzA)Os$!Fhz @+b3ljuvj|-(M=`e~l8R@ʺ`r(nd!=_ׁ>  `{ }%EX%8ZkcaM&b7qS?VSrI_N#UkS$ި%f@[|\] +  ⺼aLD[܉յRx:ʃd`1ZG{-B0lл Kg.|P-,(GHzϐN\ڸ^jkC8ԯ6OA|]~׋lb:yEi8ce8f[ {^o /ڈ MD'LBPeg V)W0d;)@m5A-ي?I aN΢^ܲ)AQ,_rebBSB8Gg0c.aw tD8"HV1,J p4M wI2J3"J8ί}e#鴂+BVJ|f~KJ'i5KQrmr [ Ҵ6 @L&_=԰;>Eng`rѥ{Z@j{9lAߗI_|{<?q<0qrm9>ylWN1{EWqnAjc^Eu!#XvZdTA";3rD^8PV B*3&^UQRƔۑM`a"oBO+{Cr9;&c5e#;B:3  l@N5MFe#.W^y.OJxBLPtX!@fm@rȟbcV?~҇/F(bu?$NL3_浿#/b)4b8T1Tf9N06pX=Q3#5tR)&Ո'SԪ-XS*}1{wD7m :ڗXE͢3E\QkWOPt GxY*KCJ@`PN9^Uά]֠(#e iyzNJNI\Be[!bW \bH};WqO5Xy6y,Y6%ԉ#@{!-T;MOPFq]rk.ZldJYi2sޞ_L$UѿQ%\6حpiכO[ۈlCl[|. IttF FIB~rC+<1O@PfxlehKsfZveSfےV&:l=7<)8!#ܿtǗOz<q~{ xs yUCU6Q7%v`mULy퐬%ov*uO[ fܒJF$JP[dʋNÅfz^ʂq26GW$gn/&fwQ![C90쁇];9~XԌaxMEA:e H Mvrk%b.KyT2YAHf/û[X(؟#>0bZЌ;>_3፠[)*N/:b V+YA3хIы*@#ng;dke (t\3!McOmǪ s%:Vjs ĩh,?71:%M":yjb/W'.2qKNΠ_QqHI9>bsAOi֡0}<6kz'*DYo >؀9H.*Ȁ i ^TL lU5VۏNtt*zj.4Hgy.IJ72;℩[n5#^Ty9ׁhYc*6S״3q`͒A[[(79lj,(G@kR]b4K;踕Зn$3y23m yfGXn,Td߹6( \0BM[Ws@ d9gmjXm7dH{a `cpU:?ڌ@5%}Jͩ^E tL/5g"Plh?6//gىhiQ+FxI{زGUk!DG mH7k`AblmvQyȩRUou|M 4 PzdR`;)ivj" dpn $-cR${$?ueq$3K/; QI"h5BoF>}Qˎ%ҤWϰl#S8,m[0 uҐ] +,Z@Lr}?ԿOcH.,N,|ZI>H@$ ɋhvOL{a؜tl+L#N"#G_1'Ɏ!ǃFCu!:g\嚶2l5E ~ǼbQ_zcgp͢='? ʝ|-#ur&Ceol Iuɤ?\9M ZTCuvTzS&:fVNR̬tC1$pyc}vcuyMqpeyT yqʫ^*MD @Dc 4'R6#xQ;cp*9O±U(SOI,g:ExnQÆ%_ȀA =pLp:WPmPsc7QIEM`{QlA6a"R3P}~ vl ʴ`s>^gg|Ċډw"ԦkHm~`I"L$>FOy݄`"s`9m@ egWxhHh:°|Y[iT߶-.J,CotX뾙兩'oP_%gv.p<ܳho39OX@Zw[q4y+/*PyLc[nfKIeB;KFI"= Ts}?! :à;1rקcN~CJᲂZp vG͡$7ҳ[MvH9MiBl% 7ȝx/BoJc:ĮtccHh(laSQW A`-ZCy&El\ ]S4}ԨCu<LcԸ:fG;G<{S5o@fotpyֈ`Ԅ_ UI PIkXؾ_&]>AϮsPr_\h Z3fC#t)ե!rO%,.xrׁa˘h#"3ڐ|] l5*Ma9h Ůb8/idYڔ2O,un/-̦m4fD/0YY8`4[J2?MHk;+cZ%WEK^vHZ:/1LXw[X "F/_]HY+Nb@y&A5o1_Z >u\`? DX.AP zc<cA@~HbJF\Q >^k‘?w)u%`c &jFCZitJa|3M3^1('M0Xf*K$V`]ؽZ)X~3M+#)֟_K9$%9>K5+\])),6D8O"nnvX"#*ia&duAghB)Abd<Qӯ΍&-=ɯi3A/2A֝.NWCdV-%;ЙwcGH0 V<ZgV; ;M߈X77Y&x9T}thVXqVՒILY>~*Xx)*"_N;r'\n˧Yewyh\ch"FnlZtGdqj j{Krc.o6)qR+_!5P53-W pPzl2=;2Gw8  Bgtօ .-71>dȏ+&,á A/ yi)3~Ś{'0{kiD1ಣkV$7)Z6*Kz8UYecLmB-8lAjr,ߋse?Ƈ5xp*[S3 Ns/.hrq,6$n2Mh{tS*S ?6]Ɵ Vк'Y?z-ĻD c_syLzw_@_'6pӄ}Í0 ,#:ULZp8#Ş/OG8u` R={@$Y wQ . \6@ (> ?JeUN!Et A6^St.eb~׌ݥBЁNρ:7ew8n$Ptyx*9{aèZ21AfQcH\dGGcWkLOHr i'#x ~V CzA|+nY<8<0{fSv1::^ׁhH}ON⻫ 5sڬ`MT3Мhz~_m#3|⦺؋xP=S&B$rkß嬤G˚>/,7wX֏I tI [2XPq/ksa#gO.t UK`0m\S^{-@_ kr3d.qH ݕ.Y_ ;PeV"c @at&x!KUNtM*'OZfCX|"sj ;4*-P¹ p?FCl4!^H: %~4 yis rc&ΩIV[$6Z"ɚ_a>CL"! bW.l I-BThZXV1<>Q1}5.R2+[h@ZF8E_׎ᡧ T6nNͶD .WcYI!DLbzaEGu:0 k&:KE^x;w/翴 g->'kе<ЀD9: 2JȖimH BIz$m!XWXl?`{\paOw}1U=IڢfTu^Zt6Um<;@%^~u:CpP+6  ܧxίNgg4Hd%ɳT̏Xd-z;w/4xC(J(+h4<ttR(n~\i L_Ga㱓] ʲ`ֶV9J5)P&BAЗ w˴Aӡdҋ8TQPN9I` ,r8bHbu|%39,z#r&f4ѾZaW}8JBV}x{jwkghScè)֍/{pI ^X&a&jdǪ<㾟];ZfF`!>#&q8D'쳐.iBAʨ/g/}s߅\Éa&8h<dl DSسґ?)A]A(uXQ㴒4oUlv'BčXk)ItBt#)qCym,Uxb4I `Jڙ>ca,Rfܴ:!l"u( r{G~J/ {< taGAPZ#- SյbÄ(@nsrؑQdE$Nj t.Rh,ڷv$W(CItƝz1Kl D;AH3Tw҉NLXp"F:u7YOuEY)1Zv0Du . -:pb:<c0B}~** "%0&x6ǽs+Ϻ(c!{s`1 g1#[*g42sFr샞@Xb19~uT;7+Dƭј>nt qMLe"DS 8 mUR@8 -gt_ #%E$" ӂQk1o#@?+к< nEz;n/螻NḮo8xyFC;85w`M2=;Шi< *stN6$rȌd޹we_{ Ǎ&xT~ A!MUdC>ĹAd.YɑC?H@IC,֡NB,Df&h(|6g 1r8LC4$}"i,3 mP>("I`(m#,Bl"]8""c~( o7P)!WXHPuuA#֐pBo>a4ők<^lO; yG'A"039+| wsK~>ۭW߿3Bk-<|\;WW[RSQۓP"<_-mGwE_ϖ^חWPv>%]щ}2Z^ZOt- "ij6;ԠAhSӟ Rb|?@+NVP{K^^U8kҺV%{6ݶdH6S0Z7dOO&@RԳY)[3an-:C5y:֟GsGJ+㌾k<:b[?X6im.q]@ jN$ 8V{Yf dOzVgr!2+x~a7` }V$׼/Q@)C ?!xFͩe ]*\Kx+`~}L" 3_]U+< h. >}b~ w_hk&f,fj~0FjH2U3`Yyn7UuST(nk]qlrS]@P=$X&Qki)vc&Xl}\. !6bk#רFhg{:-2ڑ׽~Yg,@<6>6^ 7~nkq;aèeCZN+c.%[h4IۂF q6Y@" ]Cx2RTX`ʔW36\*_Zj2t ,SFu r.dh@t(_n]DU5Tl^`Y^ϻ/L`̺E2vGwBrR]&RCi{aغq{Nû*^ pЎ°NZowc{Q(r:TZ{diz# ㋛ZRY6@:mM`t-ojqsba.0}M8r -O/x,_h t6wm80V6?skG&X*#F WŕaN<ΞPWF]?xjR,U ᖓdHR㨆n\ !KqMv5Ox- 2x2<(!4/"#Ȋ-A[M;1A/]=@㋄ĉsy/wĽq 'PW-`V=c;>M)ab캏eA>a9NIX޿AimK?7E(fTL!-am(&]bDڳj. Dž ?yt!^AT+[U1: n&ڐV>o0j!/d)|72B Y:eQRLͭvVCKk'PWp LjBI؂ ffY~~Qrk[⨅h ^tiE7??s4vo]=7E֏ӆ??dy? `f'-?wx𵺫W竎 䪂<0~tVU8|6h`˿Yd9ȟ,}V"cU@O=dR05MisfVAM(-1栃k>51bOkTw yK"Ř/UkNKrςo֑N"GQ,T.0B~2` K9E2[#߉_zz'r6c9z"mNf3~OαƮqO|4 ~O k\k!F˫H"O%_e=4 ҺU|bfc˻;>є ",ʵl4bGD1];e1*QG[WÕŶn\T"/@$U5WǹOX0qQR!zl?ZlW8Ax"߷fb򩲽.YܑeT(/@K]4t_6 hDtޓҿuxyv e7,J#/_Qcr c砃'SRxGM0w2fQ&&~x.$O@a"Y4/:N W^-Ӭ4BO6B\ uZ,K9R/S^g_UA =A`TXESg۞bHz4e(#Cn}BtE$͡BOKe%K1$7qfӟioT*A zo20;EGtKO((c5;r=R wWwgCThHr]Yz#@^L8@86ú%JU$ t)BњF YC~4z.}&Kd ǗNS!?6-1z`N FITw;[F75J1Yҋ%Ih Ϸ?q͛z2.RKQМuCX <xQ9,w&~blru>=vӧ03yE*cyAgo9,X[JAE|ۥot#%IIiJ'Q l=`gORƟ!rt0E"uYUOv.ƹyHC.п"g0IKgz/;:ÐPp< F@SYbm W}:ƾ@h n_xY?L6ton3m LƹN0tl4OLZ( !sq;3Hs<ާ)&"L{7*i P& V̿7 RM:D3:suK~bn6n>'/q풦@n9sj@ō[D-3FAN̵(@#.y-붅=5"Qn]UGn!jtlw]9f3Sםb/F1"ϽM6yDNA!v,|nSMum2LH)tbE !xz~:7)^>R|0&ZQlqRXHP˛@qsih1vږD!cjWmA@8kƑL?G4.bX})r"' ]'Q-C~?ɥVa?g?3諗ZBJ6~ȳb)E]MH*<@up4( \;c8-g"HXςZ&D-mį҇\"=ϊTVE~^p# 23${ ,GWtF$BФ @%'^HO!r q6/AELW5{E:' 傚ST@Áic T2_&ކf5^ Y~ JS7X+Bj#=0=!F싏yqm;Ѕ9=u.$X>JAWˎ8*k#l8+8gY<Ⱦ' yHoqJm@Q5f7YJb7V.OcFx@ЃL 陉2KgZSArTuTjaeĒ)+ØJ)bN@F|ji99r._sǓvϢSx񯘵 -.e~Xk g+'\'@m -$'ؖ uJT x!act~1 WN]a31~<U*罭^Cj+=ٴhu^w_ l d:ti箥+ x?elxC$gƊE#ԨԠ`@4 `FHKgDzXv9~_Ul: H/s6TYMQ9}xfĖI6h⬕XvFojV/pU|y^( j)_p \>} #}!HČ> MHGNFHP`ǰU0sP9S7*@`1̒E=ly[ν;  ><6_x=I.\p"Oʋ Bd=1ݾsH%pHubxAx],"Wk+0{]͘?Ubk'͗"lDe6/-NNhY}XLe>kNj=FWMNyne9kb^V+/ }<'tyx>E('&͵mH"m{V!MX:ƾp4ɜu[ĘhpXeaSTEA5: 5ӣybל`csj:[˾@<8\-RYa6{,(!^"^G(V|SRï~^VM1i\ߴ}Y\"uGhq!9ᱧsPZWOTP+zyN< _{:nb) \ٱ~-f.μzbA߫א^B .޾FʏB\N)+i2/_-N4 ʗǢ͑e=O{WޙDCȘϘ[ [}b/>ABJJ:vCeJ< nE&yM2>رa:F=HEڴUGUSOYccх([֣ҕn5В Ht$g(bwPZ]hHph3R+L+!oLk{("Q ;}!vqsQ\06jUϸH^ &\$qvDK~ZwW~u'lLǞ;ޞc7(I7Pa_]ƍ5FAK@TwO=dNu)F٫;RzC}:-A!;Sc9Ԑ:deBYDk͕ F~ ܐ o͓41]kM_[J vO&S)Bē$ Γ nFfzNN3 FA;O697k]~RNPi\Ya:c=SZJ=9ө OfAǂ$ Y}oTdd} GJƱh g38'R2|#&"؄QԪpE|$Տآ3yLuϺ mMU_tJv*LQ'@m d+ԗO[?FJeak ֆ >5 VU{}r@A{|)lQ 4:,?aUYa$~\rY? Ss+iIs Kƶؼ!@5ݿ1}g(4OH7+r|U"!fK6 4M]_cxS`Aؽ0JA;"UQ^ Lq.zͤzncwʻIx>(/U`:WzN20>6"e7?5\}KT[tSBzB! (b]13",9>Qw*b}{^N gy*~M,V:vE2LA*|/1 9)"=lt+m3nf{n|IG^C(ulCoڂVvIF_gp,tˎDfsr|*&HOlF|*Z2" T^/Dvn5uESIHtRH%Zh}f];0άٱ-4zU>1D4E`2J(E1(8ou[($ zbًV9 (*jCQ Vu뚨0Kl%YAf͍kB ( 'k=qm(`9^CJ*kO x8sf=RQNIY:5\>mpFdc0K~%$*eU*8ݷdC~ס_z wl-#C ׭ /S,|+NB(iʔn;Ϯخecx;&F~\`=4x2Ϋ)9tؑ?lB7Өr{cM=e!uX}^k8.jpӖ\(#gx$+SLGnWy ւ'`L,S0B2XfڇHJ^=4 b>JR9CAZ |"Y@5 7@#B .#ìS撄ԯL߼[AbƟl(Y!}maQ(!s DB.O0(aő\nI=Ѣ %)AV$,gر($j]%'KÂf4'YT<,wBVO&["ǡF lu]4#Uc_n8]oCT69ڧK8kyG8yhc#5,<&c-N˟U|FD#\v7(,=ߥP;y{6|ؼr܋z}z2HwasY[r d[:7|O9b,za3/Uߨ1j`~5){iG JFڄU5"hjlZ]M@V؝X) )͊eJ:qQ$F I/Œ{>kF{B"*/F#"}sZ"_*NMbi\*o埫ῼ0<)--0J|y=9^x2g0'٠Pc-JȬnpVe))7iƙg Gagk.K}휱]bMPr"6ƓW0u25l9y n?X*ol3Ν"F݉Lܚ<֣֗obNsBBaKYE2mV2Rid9l5sF(qMVBdBb9.Wp]NM*t&rB9*o+)βlL=>ҝR |_Mמ,ׇp"ݔǥDnlڑL% GA ` usp*d|O=Q^{x[2Ǒ1iPCu~%1:"(-;%ɿ5'C<\PPɁu'Ǚ게Tqq7XH_ˣ} ) ݍPJNq3tB֘d2+^Ews$n}O"u X8dfϳfv .Ai_vm\S$yo{gTRՖP>Y&oOcI<+Wn`ȧ+6ۋL"5VO.)VS`^{wZDzg #槄yZ`a@V:4 +FW;5`(ybS.OoM3n_hAeOU3FR]h5b6;->O50Nv <~z]iJv+ٿ~?̎ D>~Yެ{|&gU}mWtKmg"{Dl{MIAugnv/vZ yTꩶ\P/;I$#_Β!=rH>L@j\!y2~=п^411edD(p tf&haR'e3{!A/^!-#:`fdf'Fک MRxQ¹C =vw ~.p8B Y6naz9G A>/+/`q~ aa'?FM< w2ٟϽGX($l2 sEcl mzbS=L kKɤ mJ}<:,SjQ+9<OGc]ȣQ-~E{%4\l*.,83Dh+(iExD^׭Ѐ X/C7X(!WtRO5m[N?=Q_ޜ쀬%+?3/k㔐 `_ WPЌALn_{$8(/°X-`S<6ipHm$P*/wAlB!:1B9G 1!_\4 tz2pALyR2z<[lj >W7ףSyn+ʌ `Td^ʶzVÏ F+nA0NzlΣ@?/!vHz>wbZrdf( qLv8Hhk1cJ( ,Jb\PnZFڶoJ'kAv(›Όh'02hb( ,CwHPW;wM]%o i9LE,S㩷fILCzv3XջhH ~NnPL DQ?nzҷh !>2 FeAJ `QCbeKf`{5"gBU=H@C؋ i|=4K=_xPfMF"5X1)akMJyP" 0892I-%:5R$SnǓa.޵Cݶ3pH1O v$ -]<x@캺 F 4P f@~ sF \yLz+[g v( pbCNjpS@8EQ`[$wA2Ѫ' )]BHdyENEԗV4rSȿcmWvيe'qW:nȞ,<99OAπt8;:UFoZG{-z\~R$0fzIz#%MO^o+XX}0keS'!HF.{3߃nnI:#۝nzih8(mڀ{B 5s$n8|g2r̉bP{(DiaqYFbwvpԴs5t8؁ f.l+XxS<0eaJh]h=jD3Μ:d(s Z3-KiNƦ% r4.(Ҫ=?n7lMw@峸!ѻ@!^D(Frᄃ"; #g[OECtr@,'b@_1L^x) D "ֺ NDp^=|ƀ(u&븰:{!SpmZ՛D<L2%s|} [.Q::foV?{N81o 3;bU7|)k'"Xaqellc$˸9 T%%/Dչ Xh^t&,Y)EGсuf# 7wG_]\r+ aۼ;Mfc|LW`Մ{&np?SCpZƽ]/0c九ICmh+wIOKr060M 1{19';Jw:y2c q I=m@$P( V3p>y92gcn)]\U%ʑAPSyz,Mc,h~!IcGn0ÂQ_Ntu`)̎:mOoaݘ \n˅7I >p8Q.6ʅtס;.jS\RuB2qB3bl@`'Kc\h*bj CY(vDZNWR,{, -` (A ;GZ\-|mNZ&S6le;f#]٥۵ 2.w!mYrc-*l6~ﶵע$R.sRtQutFЮ]ЎA+9&8B'=]d*s-Pz+8QL%}ü([S ܓtTA0͋;I0b4͠C4&0 l:"bo[츻 nl͸Do>NE;_ .GDX  35BkM/bzBvYzaϿ) D1`"Qhh7tI+Na l) ؅v$a lsˆ>8N-GLyųFd9Su뙚`%FǕpizj w UE~0&_R ./֬_<#| &*J8Y1%M@B ; I*A\G@VSD> Kʘ߃ U޴Nm_Y#>)uM]<.0`#V2vGLaVyڳoBatWQnK.n%p0 Kc]%6sӲ! w6VT*Լcb9 {~h+!f{†{]+q4!︟1<ାLL+xH4L| w/1ۆ&w^/B62|fa_/ ξ2œ{'٤DVlshs_`2 zv W|yc,?F!sQ 555e [d]k5.s[6We VJ/ÜE6hV ZVCZߠmz_`"`̀IBm.$ іBĿ(SE ct= p^A*'GKŒVSw .{ps^h qaR-BhʣKd; #IQ},vqIaAk~Bq\B\0CY 酎rzXkr{Zߞֽv]\"1WW!~~%k""avcZ8Hmh9^`Ŝu`k"je^PrL SKCgk!G&6>@0)qj! 8Jh3%b 5 V+M̄ ( ׌fU0FR6eQ[0ea#Ӻ|]Q= D[oPYP =5VgQiMɨ1gЪ, .Y!6j qt] 'nuKQJ?) Kp۸ݛ! ~ƜM8>'ld<">kzFUf-Xqytz*7b"`Rd*.7u߯Tpg(NHlf Ν1ٸ$ТNpFқ(i@֤:L*xn/X%!̿Bkp,RDQԔL32_`XaЏq%/Xl]rc5:Gq<v)aT)*OV,_RP5Q~`؎ @lz=9FG>Yj{׎5~W2i` 8G'RE<^*~NVŴR?]g@k7q۬n9&w{Al0_"TJTHMA6$f)(=IpWp+gUN & !kF,C؁ Qʸc@iȸטF+@yOvuo@u^AU_6>%Jչ <> еs #r$w0y(hvtx<"inp%Q*hSVq<>7[# Wa+['Xofܭ<=FZ]7bP aB^!ά^@SM^p<>" <ۧc⭚M&|0D,m;?}ҿjuBCj S ^ rIK050H )2@"а)1ÉfPwJ%+C/R7u[g $(O4v`yY!n;:p_yI]p#xCOjq%Bb [|zG~lא_k*z$Tv.CnTXj/!XC0)EC I,eya!9P]pRYc̩n u MFuh@qv\ҥFd,CkHq6>$X2}R/)(F7+ğ__O9|Tt5'줹Ʌ|*{23"V9nr0 שE)߭vSk/!ԡd`/5ߐőN~а.]jP @| f*2lt38J{yFT :b8KS `oIbJGʾK-~[3ׯ)&Ɋhvl,C/)]mvYAQULd ]lCj3"( $2.6ޣ'H,>xA+~ Ǣί:B>0@IP2ÆcEK>5Aj߬}EȺOhB=l YNw|_H`+~B?`cp ս$vZSb=x=IB+t%|5b9S4 {GȊ4eE9p*M0V^W>mLAߏݵ>a Nʟڙywu .*@! _ B7SMN+ |b?\c]S`n)ӷ5Y(+{y˞e!`+*z}h{W]}Y񻠼h JH ّ!wOC܈_K{!5["4lj`yC~Qx%N?YߠgRSqar.%| dShEYQ=SXe.X M"YӨܝ &@f_e#-~u>l&9x}#(NGWbV?uD&$Q^໫m-t_tؘ 1:šW8 O#u3G17Xաh)s nb}]PKoq6tS:Mnt7z-Ȁ@ uUʰǴzB .|zYthD}ĮI-REM{ Yqaot5i?.=N 5ۂk00'}&XkZ :f;\ v9"߇d0h+ Х"NiZl@xH h5{}Anmt3մOdmd_ ] p85!~%51[|oAg/}=e˥i3 iA! !<Ǿ/ 8GXs&J)7f{*[-BȮX!#)v* XioH_UelɃM&ԵOp YCժ@ie] ŀ7 9 H8\SvR3qJh,؂+b> rKKyH]x5czf%TpD| ]^ BryUgUu_iU DUYhGS9KL}:Dky]O}8}</Ȭ'Z.+ q= ^&?{7*[y9{Yo\- ]%/=re꘨]έ߾(st~bO^C$䍞z<ÉFpKzb5.})S:Yr-(2*IeMUg@4}ukڟ Td_]C]c|eEobBɆ{qp'w *M5 Β݉0[*% R5麼1_ XJhZb"(`a7< 0S7"B8&n?<|ϻ n\DzJh\8V9~B 3_bm(E=H( C@g%f.*`!ZsU4-z\tȤH V'qu^gAbz -\N "McSu))/Q$+f*KHhhϡ`[t1`.䦽$݆ZãXݙoG0#dHm(;'N-Ӧ?r/Fr"KhF͸#@=$r-q5߮t\/"Żm taۄ#M)&ozȽ+ n1kU#(i$̄_ֳFQ<iuhf]U=@C[6MV5I<}>E+*-Dx.`)KV~73WP| $~3͜ ]ME4Vf}g߉ Ck@K+dOvMbjMWmg24Vhx)S:^L=wAi5ipXTZEykQWgFd]:/^9olIg~RS@Zꔧ0r4d| m1n 6K(s\aV~) rk/)Ƒ9+pV<@iFr3Cj"̍r. &!h`S\@}$v#NU#ŭkN:|: g5kah2u`T3_-o9!]F"SY;֢Pg Oϕ̀&ɋc6ϏZ ѩs ȸDN}~DgGe-Řn)9cXv!d}vdN,XPi//Z(0s]Rd8Ie0lcuz+‰ϻtRhJ$lUƒ!oPogvß }:!-mdʉ+_oh&:}OZexփ@aK?vF $¯\[%3w+1d "%;R/$Fzk';m2<ޕ)Vs25FKA[r.Z4O7{)z7D,Ȃ*W 1QxqܰVjG} $nPs@2H PA@N UeSQДF}᪴s^_ƜJ׽7I8GX-XTc% 0durCm#ߕJ7KW]Cx:iKXI@rPI{G5wÖJj qAdU#)yg 3TwiI.}ndi/͊HMg;!TN7.EEGZ2J'},]ߋFBB܅ o;**'#Ϊ|l敗\Gh/]Yï$+:BTu{is,)ԀI\/ CH'i#%OFru鸓Erv`g Tױi:)&UooD6zNB){ "N(K`h`&+:{Ia0&xq Wˌ7+dl3g͋$ >A ]nk1E2ӲZݫW+C"q0ؿ$4ӠKɐ9~ꦶitp_(Ţlgso1,g)Xtr]&U3A#]b0,ko,3X!e{hMU_T()jA0)xXR;gxIX_^RK -%G;/adk& }DLƎ]._vڼE'VqE~HzO;脂Z ?yf2` v27G=4 Jo-*1)//)u1sβPGSx߄xϤjUM0s5{Vz~ i5p3'{ax5 U!*_ hjණl(#0K?#p:Iںiix$fݍ@TQQTÖvB)A4R?_=B1I@8%Ak^APOJ#ijp(d݈aznGN$#L跻hVqpu_ٰf$PE=!HqĊj5h.pR1w.|LGc P{|A|+sAw#}v 9_ $B|[^M!7;h](>DЏywkD)n2.%Kq%@ JPC+UK;YrvqQ+Oj^ؔ7kTO8 ozl`mj_DU`+n-gt#)oi ӎJ2Mu?nzsyydqoK:9p4]D&MG6 Q)A(1yf٪{6*ӒˆҕVwA)\ۻɕy7OQځ(Ë+55ڱ|{NHR4[|S8߻=#Ԙ$]CTL/gf_8=+}f~X^n]:@x_>b2^foOcwnOhi[…<0m|Lu [ 5O(4jén$w$zV_ns?5CZE?oBhLBӆ-3v~ 2z8:rS?#vB6v/L*Eu&6)C 6p)eOu;O9ufN h΃@3rhju)蓜&E ϧ.i#b,,Aҁ*pz?`))6R{^0w6F4ʮ-Nڟͽm)ͣ~yPyZǵū9VP3YV&v$r ~kGFB'5!6Q+i4mvfa*N6X9Tj 'bfJ&.GX̖0)7[?g]6תoő> {fSN77?Adz2`Fsc͸h!zT0QLy;Ip\AZ108B 1G.9 bVaTDVfVu@01A[=)4J[ώ5\O(Α3p"V^̩ʊՄV7?'?$wS$tKӇҧi;E^0 qU|Dry_lC2YY2.)秒\Dv7pLek]sDTåW?o pEOm7 Ep)&Oe x.Tt{IFxc16<闞C**d&N3ױK?1yH~]_ Eۧ6Fj .J[S@DIso~^hn+!ըDu gdEOΕ5_WG lيJ&j}#t=H]ͅ7!Gyl`_vDJ`0_ yS^P| [%>%'[3_6fiV/áXw4R ;%"wɩI^UP@0/ng+#F0:fw]bxs욏n[o<夦mRfJ֘pM5f_B}b],Fǂ6-zS7v_J`ÛN7ns|w!e8c3WK _a#L_N2 {i b/*ؑe8P.:ۛ Q 8 Zi@[(`yn$1WFDwtGL}? Q!giۨ;4DTͣ7 SU0 'Q/_AV"T&wWXJ'&i;'쮺t/) T,I)uz2#X}d)#1޲z^׋m,QxZP< CEGCȄ{P RߚBB E Z46e!l?QaONlmu~n( ,DdP8 ׳ķ?R4.9qQnhCI,fAŖ_==Mu43 N1}*L"ӞX8OU4u 0+bMqGrۍJT P ot kQ+1} #OP9t.p6} $bаj[8F|]:]ɯ@c ?ܵA7*@P8>טRv}Y~>eQDU>XdABfc*!pjb_wp>vT3AJ0Vrѫ.|c(a\6,zju޶cLc;zGC7^Z :2\8pdE"f+qQOwv?SxVsŷ5xۡ5gPh.cNlaϰU|qgSu+͞L!=^'8R)',68Shъ!`wb؈[22jDKsm\03ֆ*QN^v Ly7cY}5ě^j)_?kY,0&%P}Wx}< {|r[ԛTFK*vgYv9:&+|_ R!h";[ C |@EBW[d7C\5O,P縣~SM|5H%)DY!g }. 31>DfQU!e1I+ACW-m^ϹFݡzR~:ݭAN|{c=Sd8\@+ڵA^ܠd<+%N0zT38FrRCZH/H䯶oi UJރ^\ 5t–೜w]TVlm]Eb / AWo lf04dY5wyl颩֧${5XX#MshK>bfbn쏛l}Vf: p4u/.V,BolwaZ5MD-\JUY4NT d^~;efSBq%:DҺ /' ^[~ f;YP7*8ɓ (>ͪe ͣS(+ 6w}<:?[`--3UXښ?Yɇ # ~vHq3WVE,IU:]\O`t녛fSA7Isp,8\ff?Dp8tgԬU '((u{%NYMv:@F)-nB{FWP$0)Af Y~\U|cJ\P ;h{:b6kW? v-ώ)V((LorK1fT̪n%.AD ' ph Ui#*uKVƯ'\VJ =`ϗ&[rpK{D5gVWiN j̆ijЀ"HQ/FҐ6}o,CD#N= 㗍[pfRQ5Hxz~'﹒s`Qۺ":\},}jArcyuXK G˔2R#S.@r3D #Rۉ!_ѷcwTnPn\K^Nm/yM+ η-/u䄢' wJ+{أɄ U:ؙ͎8,EϤ#7KP?}`fL\ }YM\Fzx-{66P0ad~96*`*NoipĀ*ꁜ%K/ZKH).K!YeԲA㌜m.Jp$-#+[ޖؗݏ2-j$<Z*;wsX5nV!U_mN3{k>u|CPWZCX?te!=THf8Xȓp %wW5^RY<`x[AǘRP]9dNN1UB4W]Dֿ$k_̊oX ӱF"R!@`S#^zRzi崄KyIR4efԒ0H~x|sF'^',?krs tcǩdm!q/OH#v3 M`2_)O@)d&'zhSa.^)3%}'rXq͡x#\]DL3,"]H!MzQ_vBT5ɽR=%Fq3}*lPdikG-)?GgzNX9 ,)7)l^-5Aij8 ZO'] g9>X@ƺgJ9:.6ϸ>¤sEZ,f4lnS/g>l)rdt l`F7ʊUrF]HVx$I(3#G>[7zYD3WDҝ o 7]}79Viړ0i݈79@ϔZxt Řމ䂿 oo3DFYr{ Oa{B7HsB} >ǫ8?_3Z`Ԉ;B)ea7] p[+YF`V?;|E%E ,Q”e#l>Rj^WsQl|L 82`buq6@:')=;ur33/_Y-Cʂ)?,)d* r(#_< 7|9~[] =gq@C./Jwb7IZZkH_IET}hq!+)(lftPJ@4p$!vApQ}!5"_WvcBjc :u_Ĕp@.w{%]PMx -s=;MO[bti0 {35->%*t57d* <>Uݯ#^h- y0Hē&MVrvfSέ83GUtpHxN2p>orDPhy׏8}.?-z~+h#Eg. $Ir}knS (SVĬzGBF3fAX6µ쾀N~kW%iy{.@ݬ5FS36>ttOfDC5nВzu6'tPJ [6\ 5*'ypAˠ jSR?.bݠOx" AЀ ;@@$(*g<|</%'~ QFch=X^؛ nѿMGa3eɈah|[db8Bd"w9.8)=dr2 Q3ҬgzhSV/ݷT#}:7D 6=PNIZbD aN) ו˫`Ant moZT! ` h!zm7x"T9 4ɍK$!Q}g(%rχ:P6ߴy*ó~S6b XN47 /)Aܛ 3 M.`T<@lNpq"{$w|e`D'W>`[A^d l-w }(^֨$/YY U7joLhJeܐƿo'-|w1*TI< W%_VϘiϏc[~"vb`5%q\fd:dB{ ]q5- *x.ǝ&7RhG6Y=[?oםXd`N*t0-nl!PN[6};?*m*䖒kBmlS~Ui2ώiljØrf T {)ռ. ( RXǣv.}Gu_wJ{G"7N>C`i6Nb4}ma (^>%۲,d&"SeLn6sop4$ HqH{0KXWUOV=82 'A IzY[xFf^ g~u^iكM|AXp'Wbۓ,ю~}$e-'=Bրw2iׂ=16BDƿj@k.EDzBRIvfTA,7`(z$[sVDjv9c[/|ڄ\e.Lܚuqɤ}zYmΩ\XirBq+/ R]؀'Vy"*Fm1|EݥEbGE\ոL4m}{ H6xfųRNP^`9׎b &иD  `)T- =*L/s PT'=޿ pDJ!v{'Y%'ԃ́*SI˽^Zsr\>>>ǧ>o>(q >$k2Lk"u!ZH1 YB{,KF݈$3Nc}e`Q>=h)[1P뒁ȓlP| }fԂ; 8\h5Lsx~/cSβ8ӌה\\$kEqL` ١Lo#/1ja))mA9@& LH $=N"k6¶63<*v5T>EOgX?cq@MBv?VĚ/{1|q|/*#7l{Iq/>1o! |!`j罶y)uB\'y>$°7Eh3L73eSa8'ڟ9cڔR=5@E:EW.]Ϲ#R_RV(qCD$@,lڲ_uUzle3mR(숢J/B43|Û. M*4bVyj|›Og^$&!]U[1 `QO6ߩ]j)lEm%wzrb3CgS&CNE瞢vPU/P|>EZ.n؂iTܝ9(I8#rnuɟ9%023 wU;wTNro#KGH>=#F]q᪺kAa }NkݥJƓ+"YG8v_҄_P}z/0YMﲎ'6ezBҒ2Qa`$:kp/|4FTruYgM)ZuFwhMh+vcOEr@iw^dkWū\ x bĠ- d]Nhy"ڈM``t{fO4N+x! BWBܸ?(ԕ9Fz!oEHYѮ&u$WkK9F$c0貾\$:OQv] [VSNQ[ 2|q>Re!roWctG1OVC0EP˂H0I;=oSO6&Ҷb\kN(i Sx5 g!털D},f7z>P򣀛Uiw䕽@wY¯%EעRњUdDyp%}B| =#OC'S("C)1ToU7AYP@A,ՎBbOQGOH@fݔܬ,F=mH.MA8grb_"_>Ѣq5Riv_Nٓ%:;jG4rmI=,( &q^n8?o%yrGUݩۏ\(E<\uRÇϩ&{`±/B[>3ԧ|bLu6jۯloŶ@c嵡4Lc$6v*>|zo-N6[Sj7~ !]}7FZ( _/KOpt^(S ԻN뜥,lTBKټ$r``N-tӢPd\Nƀ`b;YH%+ol:xCp%2:4!'3D-I`|8W.ሤt Y GEkq-&h֢z3L҄ߝd9𨥑Kq!y4Xco3=ؤ- !0QrZUFGb(`48؇)R.,3eٲK7#Xah YLcjsIGl ˹Ҭ窶W0CbAXYC&_8kw`kDfɉ-cެ8IY՚UW+H5|֯#nLخg*gۼ,[o{ԫfNkaJ$N \Gd=? f瀛pW4FG60_iIQ:sd&ZnԒozk̢LՂf]d $'I$wߛX%W"¥B2Y0 {ah`OĿ$"yDEߞ汷4goP'M'BCwӾO'6ꢦRݐmreANo=Xt8=Kt'fm$O0:.&1첆&*pi4;0p琖}A/I ߡc`冻jӇkpq݌rHŗF&h .?rWElSICMOL`}1fӾi`״n?y E[2`LY} !S\llӗ-ҀEq=,^r:cǨ,4@>2[2+<tgͩ ǥ]?H*$S\m3K{tY@D,rY*;@wnXi`<"QX %mfjPa,Z.1<"$-)3f-ԎǻhI?o9}5EVa)ӥNxk%;<( ( E⒋POv$ -ni>{-7FsLOY읦83}ث%Lh^R n/tO fjm'9d<{[~LC8y]S[*EY-+@"+>r蔥xOKP^8[9cPh8Ecq|#o?,D~&J%FWkŦvԞc%EXJ(/v%- N^ `kxz̘ `t*ã]8o*1BBRG24Q}H`u٠>(_TίOcxJ3u߷dMrh豖 =?)9 ];PF4H7 Pf$heviV:pZ&e>RA.t|lGY"/y!NyfB+ Yط)d/wBr~X~eť7Lb ։dB޹{\Y}~li" FEuݜr9(Z T-Ųŗ㸲̦.H=12)?:Qo4jT+HX59uL43ߊꐂ6$?n%gCڟWt9oB^֣2Qd+'3]?4NtUCleRngZu8yyp!j߼o6#J7h$;N85|$^7)G#}ai>sS:%V\TfqP\S0/5*+24B}Ӳ}ա(2O!?Ɨ\XW@Ͱ-+OS({_2V-2Lu-)ؖvrg#L榼ِ 45z\dq'X=Ց1 e9x fA.ze{^FvedcS2.!UMq-/(^(if5_ ;-ylW[}ӓ |'OR8t# 3 vareYڨݑZ!n:k|4y^a}pt+NaSl<2d5SN]m%ECtHtF!}FG5AX1yE;<0t5_@T{+2bGg}Db%i=A/YTۖр>҉]FƉ ( &a]1h0}MOL" zAmq+ې8J5s2h5L>am:l)~ceZv0@POY\X/|nlR+ X=cp_]j6jÊ%;2BRUdD~Ѵ1 / d#YSunQ*ILj5S=o r`ZDݤŌn O aLmCtxqy~~#e[y@9_-pM,{)+ɭ qWr~n @LRVaG2gczP፣tLPt Ʊ2P }V{.ʷ|5Iw-g:it6$"*OkJҖh;SM@|h#+oZ%qn,m ϕ~0潡V{i-S*fGpۚgi̙3pิ\)A7YQ>_俚%3"sD叚e$˝!@G"Eӥt,$=ֻL^_Wn!b g +uM'W#T{M1gXQ"TQ%qܜu>wX`~ z`BJUX'GxHw>, bXk.zMA'eX.Ԙ k.׭ˇ ү32U췹=a>I*6KM:PhvYc2~ svm IU%@. _vJRs9F9D 1_ٚ W2K?mv*MP%WyѮB;ݠ``Fm”=gE|io>|LFo֖9"v;G$DjxrSk5ƻS=vK]KB\ 6$o\PMYsH+.QP첾57 Jם;nݛ*`rqw\)EMt&J&g[; idgEj{195Hi`@mNY|uGeq&1ďjf` B^ۺfݪfl7Ӝyӈ2E[LC:Yv%Af4 5 a{!ᮅGdQRV>kKq%߻mğ.'B,䛙v#9ߙ,YOm%ΘK{?X*3=!:?<yt^F'OijC-ܵߢV,4؛@9Vd g6+N>gJ#LWLR1Kmvv p dp릓x l y6Vĸd/[kqӁN9j^69ZQ[~Mn҅bwSё :a%,̰qhFL$,mixZ(۸D\lEo vJϓ/ P.]"uO^*\!%6op?> J!q;Gz=VMD7xZndϳBʋFG8j^xNZ_r^YYaAC20 N@l]5?I$f2x^8G'si/I#sU(𘀖RS{xjBZo~jf+cb3wwHV+4(xڪdu!Y+m$=HV_b)7 dMm};F\lN/ i [ QYXDB 1U7&S:g4Xɉ9Nx V(Ro]s W%?O3eq|w0 Պ#c敫ZN$:[wot)]VSLjiLHX҇*H>vusjgE8@%{SBDe ]ZϨՂ_ڿ(7E_oreNj6[> )D7/-4F5aАY+ԿF$ fK?wWOq37 10 -e0묞 ~]̞R P H<&RT;HJC4֤ S A-Fp8{3OkCS%zkE冖 g"c[SGJ]'?q:bdR?-MD䘘o:X +làQ'Đ_Z7Y So{_;%?*{A "lp"\[3t>>FGZP&JFm^[׉Ih3 NLa,f ?gI8ֱ7~^) & eVrUnkh+n7r=v/N/M~uKe?%qnBZhUwKfO0(뤿bm )UU3r(|h}`b`Uv-0 ީWTd?\^y1晃vLQ0 ES3&s&:Cѕz?QR^/=srڍCti$K[u֡c/F~_C(yh76|уdQ{Pgh[5mĝYw͸ksBlo|Yi&ك)uQ69M*v8#tӢqZ4BQ˓}XkW_%I1b)1];RY,{i䯄q:~-#@km\3RY $H96&#U*-zOS@3T}ҏqDHZMw,v d?u0 <0:ddwmv7^OPKN.%Guw-Qx{9q b֒hH\ZXZFIPR؁N#~} {8"5#) Њ2Ȥ(b5x?rUFimϊӁJpq ɱMt*Upe+,]** .ۼz4az?J[ZƏ܂}ډ]_]8=冚Z8x !RRv>1Ra BؘT;1:`Ő/)t:&f_ o=xO>[I" zXn`; ?Suidݛ"Q<~)rp+~2lT IYv־M)6!SK|\r ~jyf5^˿c:{|3 \V i17vV2%q[EnhmvDE:" }@Xlݺ"')[}n|՝12Rtka}#'x_{#8C' :hz)/ c)f5<\BD%4Q8f˅$IZLҞSK X]!BRv@Cղ1tP!rh2mD;(+%eHaf&~Ŀc19GzEQ;8|LAeY*߹qMve\ؓ0b@!R'us#?B$%]`K*Z<))z ~o]ImK;K4i'Kä^;Wv:cSyKOb|%*k`8vvO[$*Nsʌ[p"u.sH0Rɤb!?r&s}~UVk?ϓptM=`=DGZLbQKv 20, newname = col2, col3:col6)} (see examples). For data frames, the \code{subset} argument is also evaluated in a non-standard way. Thus next to vector of row-indices or logical vectors, it supports logical expressions of the form \code{col2 > 5 & col2 < col3} etc. (see examples). The data frame method is implemented in C, hence it is significantly faster than \code{\link{subset.data.frame}}. Note that the use of \code{\link{\%==\%}} to compare a single column to a single value can yield significant performance gains on large data. If fast data frame subsetting is required but no non-standard evaluation, the function \code{ss} is slightly simpler and faster. Factors may have empty levels after subsetting; unused levels are not automatically removed. See \code{\link{fdroplevels}} to drop all unused levels from a data frame. } \value{ An object similar to \code{.x/x} containing just the selected elements (for a vector), rows and columns (for a matrix or data frame). } \note{ No replacement method \code{fsubset<-} or \code{ss<-} is offered in \emph{collapse}. For efficient subset replacement (without copying) use \code{data.table::set}, which can also be used with data frames and tibbles. To search and replace certain elements without copying, and to efficiently copy elements / rows from an equally sized vector / data frame, see \code{\link{setv}}. } \seealso{ \code{\link{fselect}}, \code{\link{get_vars}}, \code{\link{ftransform}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview} } \examples{ fsubset(airquality, Temp > 90, Ozone, Temp) fsubset(airquality, Temp > 90, OZ = Ozone, Temp) # With renaming fsubset(airquality, Day == 1, -Temp) fsubset(airquality, Day == 1, -(Day:Temp)) fsubset(airquality, Day == 1, Ozone:Wind) fsubset(airquality, Day == 1 & !is.na(Ozone), Ozone:Wind, Month) fsubset(airquality, Day \%==\% 1, -Temp) # Faster for big data, as \%==\% directly returns indices ss(airquality, 1:10, 2:3) # Significantly faster than airquality[1:10, 2:3] fsubset(airquality, 1:10, 2:3) # This is possible but not advised } \keyword{manip} collapse/man/recode-replace.Rd0000644000176200001440000001725414170154174016005 0ustar liggesusers\name{recode-replace} \alias{AA1-recode-replace} \alias{recode-replace} \alias{recode_num} \alias{recode_char} \alias{replace_NA} \alias{replace_Inf} \alias{replace_outliers} \title{ Recode and Replace Values in Matrix-Like Objects } \description{ A small suite of functions to efficiently perform common recoding and replacing tasks in matrix-like objects (vectors, matrices, arrays, data frames, lists of atomic objects): \itemize{ \item \code{recode_num} and \code{recode_char} can be used to efficiently recode multiple numeric or character values, respectively. The syntax is inspired by \code{dplyr::recode}, but the functionality is enhanced in the following respects: (1) they are faster than \code{dplyr::recode}, (2) when passed a data frame / list, all appropriately typed columns will be recoded. (3) They preserve the attributes of the data object and of columns in a data frame / list, and (4) \code{recode_char} also supports regular expression matching using \code{\link{grepl}}. \item \code{replace_NA} efficiently replaces \code{NA/NaN} with a value (default is \code{0L}). data can be multi-typed, in which case appropriate columns can be selected through the \code{cols} argument. For numeric data a more versatile alternative is provided by \code{data.table::nafill} and \code{data.table::setnafill}. \item \code{replace_Inf} replaces \code{Inf/-Inf} (or optionally \code{NaN/Inf/-Inf}) with a value (default is \code{NA}). \code{replace_Inf} skips non-numeric columns in a data frame. \item \code{replace_outliers} replaces values falling outside a 1- or 2-sided numeric threshold or outside a certain number of standard deviations with a value (default is \code{NA}). \code{replace_outliers} skips non-numeric columns in a data frame. } } \usage{ recode_num(X, \dots, default = NULL, missing = NULL, set = FALSE) recode_char(X, \dots, default = NULL, missing = NULL, regex = FALSE, ignore.case = FALSE, fixed = FALSE, set = FALSE) replace_NA(X, value = 0L, cols = NULL, set = FALSE) replace_Inf(X, value = NA, replace.nan = FALSE) replace_outliers(X, limits, value = NA, single.limit = c("SDs", "min", "max", "overall_SDs")) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{a vector, matrix, array, data frame or list of atomic objects.} \item{\dots}{comma-separated recode arguments of the form: \code{value = replacement, `2` = 0, Secondary = "SEC"} etc.. \code{recode_char} with \code{regex = TRUE} also supports regular expressions i.e. \code{`^S|D$` = "STD"} etc.} \item{default}{optional argument to specify a scalar value to replace non-matched elements with.} \item{missing}{optional argument to specify a scalar value to replace missing elements with. \emph{Note} that to increase efficiency this is done before the rest of the recoding i.e. the recoding is performed on data where missing values are filled!} \item{set}{logical. \code{TRUE} does (some) replacements by reference (i.e. in-place modification of the data). For \code{replace_NA} this feature is mature, and the result will be returned invisibly. For \code{recode_num} and \code{recode_char}, replacement by reference is still partial, so you need to assign the result to an object to materialize all changes. } \item{regex}{logical. If \code{TRUE}, all recode-argument names are (sequentially) passed to \code{\link{grepl}} as a pattern to search \code{X}. All matches are replaced. \emph{Note} that \code{NA}'s are also matched as strings by \code{grepl}. } \item{value}{a single (scalar) value to replace matching elements with.} \item{cols}{select columns to replace missing values in using a function, column names, indices or logical vector.} \item{replace.nan}{logical. \code{TRUE} replaces \code{NaN/Inf/-Inf}. \code{FALSE} (default) replaces only \code{Inf/-Inf}.} \item{limits}{either a vector of two-numeric values \code{c(minval, maxval)} constituting a two-sided outlier threshold, or a single numeric value constituting either a factor of standard deviations (default), or the minimum or maximum of a one-sided outlier threshold. See also \code{single.limit}.} \item{single.limit}{a character or integer (argument only applies if \code{length(limits) == 1}): \itemize{ \item \code{1 - "SDs"} specifies that \code{limits} will be interpreted as a (two-sided) threshold in column standard-deviations on standardized data. The underlying code is equivalent to \code{X[abs(fscale(X)) > limits] <- value} but faster. Since \code{fscale} is S3 generic with methods for \code{grouped_df}, \code{pseries} and \code{pdata.frame}, the standardizing will be grouped if such objects are passed (i.e. the outlier threshold is then measured in within-group standard deviations). \item \code{2 - "min"} specifies that \code{limits} will be interpreted as a (one-sided) minimum threshold. The underlying code is equivalent to \code{X[X < limits] <- value}. \item \code{3 - "max"} specifies that \code{limits} will be interpreted as a (one-sided) maximum threshold. The underlying code is equivalent to \code{X[X > limits] <- value}. \item \code{4 - "overall_SDs"} is equivalent to "SDs" but ignores groups when a \code{grouped_df}, \code{pseries} or \code{pdata.frame} is passed (i.e. standardizing and determination of outliers is by the overall column standard deviation). } } \item{ignore.case, fixed}{logical. Passed to \code{\link{grepl}} and only applicable if \code{regex = TRUE}.} } % \details{ %% ~~ If necessary, more details than the description above ~~ % } % \value{ %% ~Describe the value returned %% If it is a LIST, use %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} %% \dots % } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } \note{ These functions are not generic and do not offer support for factors or date(-time) objects. see \code{dplyr::recode_factor}, \emph{forcats} and other appropriate packages for dealing with these classes. Simple replacing tasks on a vector can also effectively be handled by, \code{\link{setv}} / \code{\link{copyv}}. Fast vectorized switches are offered by package \emph{kit} (functions \code{iif}, \code{nif}, \code{vswitch}, \code{nswitch}) as well as \code{data.table::fcase} and \code{data.table::fifelse}. } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{pad}}, \link[=efficient-programming]{Efficient Programming}, \link[=collapse-documentation]{Collapse Overview} } \examples{ recode_char(c("a","b","c"), a = "b", b = "c") recode_char(month.name, ber = NA, regex = TRUE) mtcr <- recode_num(mtcars, `0` = 2, `4` = Inf, `1` = NaN) replace_Inf(mtcr) replace_Inf(mtcr, replace.nan = TRUE) replace_outliers(mtcars, c(2, 100)) # Replace all values below 2 and above 100 w. NA replace_outliers(mtcars, 2, single.limit = "min") # Replace all value smaller than 2 with NA replace_outliers(mtcars, 100, single.limit = "max") # Replace all value larger than 100 with NA replace_outliers(mtcars, 2) # Replace all values above or below 2 column- # standard-deviations from the column-mean w. NA replace_outliers(fgroup_by(iris, Species), 2) # Passing a grouped_df, pseries or pdata.frame # allows to remove outliers according to # in-group standard-deviation. see ?fscale } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{documentation} collapse/man/collapse-package.Rd0000644000176200001440000007255314172556012016330 0ustar liggesusers\name{collapse-package} \alias{collapse-package} \alias{collapse} \docType{package} \title{ % \code{collapse:} Advanced and Fast Data Transformation } \description{ \emph{collapse} is a C/C++ based package for data transformation and statistical computing in R. It's aims are: \itemize{ \item To facilitate complex data transformation, exploration and computing tasks in R. \item To help make R code fast, flexible, parsimonious and programmer friendly. % \emph{collapse} is a fast %to facilitate (advanced) data manipulation in R % To achieve the latter, % collapse provides a broad set.. -> Nah, it's not a misc package } It is made compatible with \emph{dplyr}, \emph{data.table}, \emph{sf} and the \emph{plm} approach to panel data, and non-destructively handles other classes such as \emph{xts}. } \section{Getting Started}{ See \link[=collapse-documentation]{Collapse Documentation & Overview} (the most up-to-date documentation for \emph{collapse} 1.7), or read the \href{https://sebkrantz.github.io/collapse/articles/collapse_intro.html}{introductory vignette}. All vignettes can be accessed on the \href{https://sebkrantz.github.io/collapse/}{package website}. A cheatsheet is available at \href{https://raw.githubusercontent.com/SebKrantz/cheatsheets/master/collapse.pdf}{here}. A compact introduction for quick-starters is provided in the examples section below. } % \section{Key Features} { % \cr % \bold{Key Features:} % (In more detail in \link[=collapse-documentation]{Collapse Overview}) % Key functionality: % Key areas Key topics addressed by \emph{collapse} are: % where \emph{collapse} offers innovative solutions are: % \tabular{lll}{ % \Sexpr{"\u200B"} \Sexpr{"\u200B"} \tab \emph{ Description } \cr % \enumerate{ % (1) \tab\tab \dots \cr % \item \emph{Advanced data programming}: A full set of fast statistical functions % supporting grouped and weighted computations on vectors, matrices and % data frames. Fast (ordered) and programmable grouping, factor % generation, manipulation of data frames and data object conversions. % select, subset, transform, replace, add and delete data frame columns. % \item \emph{Advanced aggregation}: Fast and easy multi-data-type, multi-function, % weighted, parallelized and fully customized data aggregation. % \item \emph{Advanced transformations}: Fast (grouped, weighted) replacing and % sweeping out of statistics, scaling / standardizing, centering (i.e. % between and within transformations), higher-dimensional centering % (i.e. multiple fixed effects transformations), linear % prediction and partialling-out. % \item \emph{Advanced time-computations}: Fast (sequences of) lags / leads, and % (lagged / leaded, iterated, quasi-, log-) differences and growth % rates on (unordered) time series and panel data. Multivariate auto, % partial and cross-correlation functions for panel data. % Panel data to (ts-)array conversions. % \item \emph{List processing}: (Recursive) list search / identification, extraction / % subsetting, data-apply, and generalized row-binding / unlisting in 2D. % \item \emph{Advanced data exploration}: Fast (grouped, weighted, panel-decomposed) % summary statistics for complex multilevel / panel data. % } % } % } \section{Details}{ % Put this below bullet points ?? % \emph{collapse} provides a carefully conceived % \emph{collapse} provides a compact set of functions % organized roughly into several topics \emph{collapse} provides an integrated suite of statistical and data manipulation functions. These improve, complement and extend the capabilities of base R and packages like \emph{dplyr}, \emph{data.table}, \emph{plm}, \emph{matrixStats}, \emph{Rfast} etc.. Key Highlights: \itemize{ \item Fast C/C++ based (grouped, weighted) computations embedded in highly optimized R code. \item More complex statistical, time series / panel data and recursive (list-processing) operations. \item A flexible and generic approach supporting and preserving many R objects. % standard % (S3 generic statistical functions, class/attribute preservation). % , compatibility with \emph{dplyr}, \emph{plm} and \emph{data.table} \item Optimized programming in standard and non-standard evaluation. } % To explain this a bit: The statistical functions in \emph{collapse} are S3 generic with core methods for vectors, matrices and data frames, and internally support grouped and weighted computations carried out in C/C++. %Thus functions need only be called once for column-wise and/or grouped computations, providing a lot of extra speed and full support for sampling weights. R code is strongly optimized and inputs are swiftly passed to compiled C/C++ code, %, with further checks run at that level. where various efficient algorithms are implemented. %This approach enables flexible and parsimonious programming and data manipulation in R at high speeds. %when passed to a \emph{collapse} statistical function together with a suitable data object % To provide extra speed and programmability To facilitate efficient programming, core S3 methods, grouping and ordering functionality and some C-level helper functions can be accessed by the user. %For example \code{GRP()} creates grouping objects directly passed to C++ by statistical functions. \code{fgroup_by()} attaches these objects to a data frame, yielding efficient chained calls when combined with \emph{magrittr} pipes, \link[=fast-data-manipulation]{fast manipulation functions} and \link[=fast-statistical-functions]{fast statistical functions}. %Performance gains are also realized when grouping with factors, or computing on grouped (\emph{dplyr}) or panel data (\emph{plm}) frames. %Hence \emph{collapse} enables optimized programming and data manipulation in both standard and non-standard evaluation. %The function \code{fgroup_by} can be used to efficiently create a grouped tibble inside dplyr-like chained calls (constructed with \emph{magrittr} pipes, fast manipulation functions like \code{fselect}, \code{fsubset}, \code{ftransform} and fast statistical functions). Thus \emph{collapse} enables optimized programming in both standard and non-standard evaluation. % attributes of atomic objects are preserved if the dimensions are unaltered by the computation, and data frame attributes are always preserved, Additional (hidden) S3 methods and C-level features enable broad based compatibility with \emph{dplyr} (grouped tibble), \emph{data.table}, \emph{sf} and \emph{plm} panel data classes. Functions and core methods also seek to preserve object attributes (including column attributes such as variable labels), ensuring flexibility and effective workflows with a very broad range of R objects (including most time-series classes). Missing values are efficiently skipped at C/C++ level. The package default is \code{na.rm = TRUE}, whereas \code{na.rm = FALSE} also yields efficient checking and early termination. Missing weights are supported. Core functionality and all statistical functions / computations are tested with 13,000 unit tests for Base R equivalence, exempting some improvements (e.g. \code{fsum(NA, na.rm = TRUE)} evaluates to \code{NA}, not 0, similarly for \code{fmin} and \code{fmax}; no \code{NaN} values are generated from computations involving \code{NA} values). Generic functions provide some \link[=collapse-options]{security} against silent swallowing of arguments. %Hence they also handle various date and time series classes etc., and can easily be integreated into most approaches to data transformation. %A global \code{option("collapse_unused_arg_action")} can be set to regulate the behavior when unused arguments are passed to a generic function. The default is to issue a warning. \emph{collapse} installs with a built-in hierarchical \link[=collapse-documentation]{documentation} facilitating the use of the package. The \href{https://sebkrantz.github.io/collapse/articles/index.html}{vignettes} are complimentary and also follow a more structured approach. % and \code{base/stats}. % extra methods warrant, provide ? %Apart from general performance considerations, \emph{collapse} excels at applications involving fast panel data transformations and techniques, fast weighted computations (e.g. weighted aggregation), fast programming and aggregation with categorical and mixed-type data, fast programming with (multivariate) time series, and programming with lists of data objects. %Other broad areas are fast grouped and weighted programming to implement new statistical techniques and packages, and fast data manipulation code (i.e. server-side for \code{shiny} apps). % The package largely avoids non-standard evaluation and exports core methods for maximum programmability. % Most are S3 generic with methods for common \code{R} objects (vectors, matrices, data frames, \dots) % high computation %(aggregation and transformations ~10x \emph{data.table} on data <1 Mio obs.). % Beyond speed, flexibility and parsimony in coding, a central objective of \emph{collapse} is to facilitate advanced / complex operations on data. The package is coded both in C and C++ and built with \emph{Rcpp}, but also uses C/C++ functions from \emph{data.table} (grouping, ordering, subsetting, row-binding), \emph{kit} (hash-based grouping), \emph{fixest} (centering on multiple factors), \emph{weights} (weighted pairwise correlations), \emph{stats} (ACF and PACF) and \emph{RcppArmadillo / RcppEigen} (fast linear fitting methods). For the moment \emph{collapse} does not utilize low-level parallelism (such as OpenMP). % \emph{collapse} is built with \code{Rcpp} and imports \code{C} functions from \emph{data.table}, \emph{lfe} and \emph{stats}. %, and uses \code{ggplot2} visualizations. } \section{Author(s)}{ % \author{ \bold{Maintainer}: Sebastian Krantz \email{sebastian.krantz@graduateinstitute.ch} Other contributors from packages \emph{collapse} utilizes: \itemize{ \item Matt Dowle, Arun Srinivasan and contributors worldwide (\emph{data.table}) \item Dirk Eddelbuettel and contributors worldwide (\emph{Rcpp}, \emph{RcppArmadillo}, \emph{RcppEigen}) \item Morgan Jacob (\emph{kit}) \item Laurent Berge (\emph{fixest}) \item Josh Pasek (\emph{weights}) \item R Core Team and contributors worldwide (\emph{stats}) } I thank Ralf Stubner, Joseph Wood and Dirk Eddelbuettel and a host of other quant people from diverse fields for helpful answers on Stackoverflow, Joris Meys for encouraging me and helping to set up the \href{https://github.com/SebKrantz/collapse}{Github repository} for \emph{collapse}, Matthieu Stigler, Patrice Kiener, Zhiyi Xu, Kevin Tappe and Grant McDermott for feature requests and helpful suggestions. } \section{Developing / Bug Reporting}{ \itemize{ \item If you are interested in extending or optimizing this package, see the source code at \url{https://github.com/SebKrantz/collapse/tree/master}, fork and send pull-requests to the 'development' branch of the repository, or e-mail me. \item Please report issues at \url{https://github.com/SebKrantz/collapse/issues}. } } %\references{ % This optional section can contain literature or other references for % background information. %} %\seealso{ % Optional links to other man pages %} \examples{ ## Let's start with some statistical programming v <- iris$Sepal.Length d <- num_vars(iris) # Saving numeric variables f <- iris$Species # Factor # Simple statistics fmean(v) # vector fmean(qM(d)) # matrix (qM is a faster as.matrix) fmean(d) # data.frame # Preserving data structure fmean(qM(d), drop = FALSE) # Still a matrix fmean(d, drop = FALSE) # Still a data.frame # Weighted statistics, supported by most functions... w <- abs(rnorm(fnrow(iris))) fmean(d, w = w) # Grouped statistics... fmean(d, f) # Groupwise-weighted statistics... fmean(d, f, w) # Simple Transformations... head(fmode(d, TRA = "replace")) # Replacing values with the mode head(fmedian(d, TRA = "-")) # Subtracting the median head(fsum(d, TRA = "\%")) # Computing percentages head(fsd(d, TRA = "/")) # Dividing by the standard-deviation (scaling), etc... # Weighted Transformations... head(fnth(d, 0.75, w = w, TRA = "replace")) # Replacing by the weighted 3rd quartile # Grouped Transformations... head(fvar(d, f, TRA = "replace")) # Replacing values with the group variance head(fsd(d, f, TRA = "/")) # Grouped scaling head(fmin(d, f, TRA = "-")) # Setting the minimum value in each species to 0 head(fsum(d, f, TRA = "/")) # Dividing by the sum (proportions) head(fmedian(d, f, TRA = "-")) # Groupwise de-median head(ffirst(d, f, TRA = "\%\%")) # Taking modulus of first group-value, etc. ... # Grouped and weighted transformations... head(fsd(d, f, w, "/"), 3) # weighted scaling head(fmedian(d, f, w, "-"), 3) # subtracting the weighted group-median head(fmode(d, f, w, "replace"), 3) # replace with weighted statistical mode ## Some more advanced transformations... head(fbetween(d)) # Averaging (faster t.: fmean(d, TRA = "replace")) head(fwithin(d)) # Centering (faster than: fmean(d, TRA = "-")) head(fwithin(d, f, w)) # Grouped and weighted (same as fmean(d, f, w, "-")) head(fwithin(d, f, w, mean = 5)) # Setting a custom mean head(fwithin(d, f, w, theta = 0.76)) # Quasi-centering i.e. d - theta*fbetween(d, f, w) head(fwithin(d, f, w, mean = "overall.mean")) # Preserving the overall mean of the data head(fscale(d)) # Scaling and centering head(fscale(d, mean = 5, sd = 3)) # Custom scaling and centering head(fscale(d, mean = FALSE, sd = 3)) # Mean preserving scaling head(fscale(d, f, w)) # Grouped and weighted scaling and centering head(fscale(d, f, w, mean = 5, sd = 3)) # Custom grouped and weighted scaling and centering head(fscale(d, f, w, mean = FALSE, # Preserving group means sd = "within.sd")) # and setting group-sd to fsd(fwithin(d, f, w), w = w) head(fscale(d, f, w, mean = "overall.mean", # Full harmonization of group means and variances, sd = "within.sd")) # while preserving the level and scale of the data. head(get_vars(iris, 1:2)) # Use get_vars for fast selecting, gv is shortcut head(fhdbetween(gv(iris, 1:2), gv(iris, 3:5))) # Linear prediction with factors and covariates head(fhdwithin(gv(iris, 1:2), gv(iris, 3:5))) # Linear partialling out factors and covariates ss(iris, 1:10, 1:2) # Similarly fsubset/ss for fast subsetting rows # Simple Time-Computations.. head(flag(AirPassengers, -1:3)) # One lead and three lags head(fdiff(EuStockMarkets, # Suitably lagged first and second differences c(1, frequency(EuStockMarkets)), diff = 1:2)) head(fdiff(EuStockMarkets, rho = 0.87)) # Quasi-differences (x_t - rho*x_t-1) head(fdiff(EuStockMarkets, log = TRUE)) # Log-differences head(fgrowth(EuStockMarkets)) # Exact growth rates (percentage change) head(fgrowth(EuStockMarkets, logdiff = TRUE)) # Log-difference growth rates (percentage change) # Note that it is not necessary to use factors for grouping. fmean(gv(mtcars, -c(2,8:9)), mtcars$cyl) # Can also use vector (internally converted using qF()) fmean(gv(mtcars, -c(2,8:9)), gv(mtcars, c(2,8:9))) # or a list of vector (internally grouped using GRP()) g <- GRP(mtcars, ~ cyl + vs + am) # It is also possible to create grouping objects print(g) # These are instructive to learn about the grouping, plot(g) # and are directly handed down to C++ code fmean(gv(mtcars, -c(2,8:9)), g) # This can speed up multiple computations over same groups fsd(gv(mtcars, -c(2,8:9)), g) # Factors can efficiently be created using qF() f1 <- qF(mtcars$cyl) # Unlike GRP objects, factors are checked for NA's f2 <- qF(mtcars$cyl, na.exclude = FALSE) # This can however be avoided through this option class(f2) # Note the added class \donttest{ % No code relying on suggested package library(microbenchmark) microbenchmark(fmean(mtcars, f1), fmean(mtcars, f2)) # A minor difference, larger on larger data } with(mtcars, finteraction(cyl, vs, am)) # Efficient interactions of vectors and/or factors finteraction(gv(mtcars, c(2,8:9))) # .. or lists of vectors/factors # Simple row- or column-wise computations on matrices or data frames with dapply() dapply(mtcars, quantile) # column quantiles dapply(mtcars, quantile, MARGIN = 1) # Row-quantiles # dapply preserves the data structure of any matrices / data frames passed # Some fast matrix row/column functions are also provided by the matrixStats package # Similarly, BY performs grouped comptations BY(mtcars, f2, quantile) BY(mtcars, f2, quantile, expand.wide = TRUE) # For efficient (grouped) replacing and sweeping out computed statistics, use TRA() sds <- fsd(mtcars) head(TRA(mtcars, sds, "/")) # Simple scaling (if sd's not needed, use fsd(mtcars, TRA = "/")) \donttest{ % No code relying on suggested package microbenchmark(TRA(mtcars, sds, "/"), sweep(mtcars, 2, sds, "/")) # A remarkable performance gain.. } sds <- fsd(mtcars, f2) head(TRA(mtcars, sds, "/", f2)) # Groupd scaling (if sd's not needed: fsd(mtcars, f2, TRA = "/")) # All functions above perserve the structure of matrices / data frames # If conversions are required, use these efficient functions: mtcarsM <- qM(mtcars) # Matrix from data.frame head(qDF(mtcarsM)) # data.frame from matrix columns head(mrtl(mtcarsM, TRUE, "data.frame")) # data.frame from matrix rows, etc.. head(qDT(mtcarsM, "cars")) # Saving row.names when converting matrix to data.table head(qDT(mtcars, "cars")) # Same use a data.frame ## Now let's get some real data and see how we can use this power for data manipulation library(magrittr) head(wlddev) # World Bank World Development Data: 216 countries, 61 years, 5 series (columns 9-13) # Starting with some discriptive tools... namlab(wlddev, class = TRUE) # Show variable names, labels and classes fnobs(wlddev) # Observation count pwnobs(wlddev) # Pairwise observation count head(fnobs(wlddev, wlddev$country)) # Grouped observation count fndistinct(wlddev) # Distinct values descr(wlddev) # Describe data varying(wlddev, ~ country) # Show which variables vary within countries qsu(wlddev, pid = ~ country, # Panel-summarize columns 9 though 12 of this data cols = 9:12, vlabels = TRUE) # (between and within countries) qsu(wlddev, ~ region, ~ country, # Do all of that by region and also compute higher moments cols = 9:12, higher = TRUE) # -> returns a 4D array qsu(wlddev, ~ region, ~ country, cols = 9:12, higher = TRUE, array = FALSE) \%>\% # Return as a list of matrices.. unlist2d(c("Variable","Trans"), row.names = "Region") \%>\% head # and turn into a tidy data.frame pwcor(num_vars(wlddev), P = TRUE) # Pairwise correlations with p-value pwcor(fmean(num_vars(wlddev), wlddev$country), P = TRUE) # Correlating country means pwcor(fwithin(num_vars(wlddev), wlddev$country), P = TRUE) # Within-country correlations psacf(wlddev, ~country, ~year, cols = 9:12) # Panel-data Autocorrelation function pspacf(wlddev, ~country, ~year, cols = 9:12) # Partial panel-autocorrelations psmat(wlddev, ~iso3c, ~year, cols = 9:12) \%>\% plot # Convert panel to 3D array and plot ## collapse offers a few very efficent functions for data manipulation: # Fast selecting and replacing columns series <- get_vars(wlddev, 9:12) # Same as wlddev[9:12] but 2x faster series <- fselect(wlddev, PCGDP:ODA) # Same thing: > 100x faster than dplyr::select get_vars(wlddev, 9:12) <- series # Replace, 8x faster wlddev[9:12] <- series + replaces names fselect(wlddev, PCGDP:ODA) <- series # Same thing # Fast subsetting head(fsubset(wlddev, country == "Ireland", -country, -iso3c)) head(fsubset(wlddev, country == "Ireland" & year > 1990, year, PCGDP:ODA)) ss(wlddev, 1:10, 1:10) # This is an order of magnitude faster than wlddev[1:10, 1:10] # Fast transforming head(ftransform(wlddev, ODA_GDP = ODA / PCGDP, ODA_LIFEEX = sqrt(ODA) / LIFEEX)) settransform(wlddev, ODA_GDP = ODA / PCGDP, ODA_LIFEEX = sqrt(ODA) / LIFEEX) # by reference head(ftransform(wlddev, PCGDP = NULL, ODA = NULL, GINI_sum = fsum(GINI))) head(ftransformv(wlddev, 9:12, log)) # Can also transform with lists of columns head(ftransformv(wlddev, 9:12, fscale, apply = FALSE)) # apply = FALSE invokes fscale.data.frame settransformv(wlddev, 9:12, fscale, apply = FALSE) # Changing the data by reference ftransform(wlddev) <- fscale(gv(wlddev, 9:12)) # Same thing (using replacement method) wlddev \%<>\% ftransformv(9:12, fscale, apply = FALSE) # Same thing, using magrittr wlddev \%>\% ftransform(gv(., 9:12) \%>\% # With compound pipes: Scaling and lagging fscale \%>\% flag(0:2, iso3c, year)) \%>\% head # Fast reordering head(roworder(wlddev, -country, year)) head(colorder(wlddev, country, year)) # Fast renaming head(frename(wlddev, country = Ctry, year = Yr)) setrename(wlddev, country = Ctry, year = Yr) # By reference head(frename(wlddev, tolower, cols = 9:12)) # Fast grouping fgroup_by(wlddev, Ctry, decade) \%>\% fgroup_vars \%>\% head # fgroup_by is faster than dplyr::group_by rm(wlddev) # .. but only works with collapse functions ## Now lets start putting things together wlddev \%>\% fsubset(year > 1990, region, income, PCGDP:ODA) \%>\% fgroup_by(region, income) \%>\% fmean # Fast aggregation using the mean \donttest{ # Same thing using dplyr manipulation verbs library(dplyr) wlddev \%>\% filter(year > 1990) \%>\% select(region, income, PCGDP:ODA) \%>\% group_by(region,income) \%>\% fmean # This is already a lot faster than summarize_all(mean) } wlddev \%>\% fsubset(year > 1990, region, income, PCGDP:POP) \%>\% fgroup_by(region, income) \%>\% fmean(POP) # Weighted group means wlddev \%>\% fsubset(year > 1990, region, income, PCGDP:POP) \%>\% fgroup_by(region, income) \%>\% fsd(POP) # Weighted group standard deviations wlddev \%>\% na_omit(cols = "POP") \%>\% fgroup_by(region, income) \%>\% fselect(PCGDP:POP) \%>\% fnth(0.75, POP) # Weighted group third quartile wlddev \%>\% fgroup_by(country) \%>\% fselect(PCGDP:ODA) \%>\% fwithin \%>\% head # Within transformation wlddev \%>\% fgroup_by(country) \%>\% fselect(PCGDP:ODA) \%>\% fmedian(TRA = "-") \%>\% head # Grouped centering using the median # Replacing data points by the weighted first quartile: wlddev \%>\% na_omit(cols = "POP") \%>\% fgroup_by(country) \%>\% fselect(country, year, PCGDP:POP) \%>\% ftransform(fselect(., -country, -year) \%>\% fnth(0.25, POP, "replace_fill")) \%>\% head wlddev \%>\% fgroup_by(country) \%>\% fselect(PCGDP:ODA) \%>\% fscale \%>\% head # Standardizing wlddev \%>\% fgroup_by(country) \%>\% fselect(PCGDP:POP) \%>\% fscale(POP) \%>\% head # Weigted.. wlddev \%>\% fselect(country, year, PCGDP:ODA) \%>\% # Adding 1 lead and 2 lags of each variable fgroup_by(country) \%>\% flag(-1:2, year) \%>\% head wlddev \%>\% fselect(country, year, PCGDP:ODA) \%>\% # Adding 1 lead and 10-year growth rates fgroup_by(country) \%>\% fgrowth(c(0:1,10), 1, year) \%>\% head # etc... # Aggregation with multiple functions wlddev \%>\% fsubset(year > 1990, region, income, PCGDP:ODA) \%>\% fgroup_by(region, income) \%>\% { add_vars(fgroup_vars(., "unique"), fmedian(., keep.group_vars = FALSE) \%>\% add_stub("median_"), fmean(., keep.group_vars = FALSE) \%>\% add_stub("mean_"), fsd(., keep.group_vars = FALSE) \%>\% add_stub("sd_")) } \%>\% head # Transformation with multiple functions wlddev \%>\% fselect(country, year, PCGDP:ODA) \%>\% fgroup_by(country) \%>\% { add_vars(fdiff(., c(1,10), 1, year) \%>\% flag(0:2, year), # Sequence of lagged differences ftransform(., fselect(., PCGDP:ODA) \%>\% fwithin \%>\% add_stub("W.")) \%>\% flag(0:2, year, keep.ids = FALSE)) # Sequence of lagged demeaned vars } \%>\% head # With ftransform, can also easily do one or more grouped mutations on the fly.. settransform(wlddev, median_ODA = fmedian(ODA, list(region, income), TRA = "replace_fill")) settransform(wlddev, sd_ODA = fsd(ODA, list(region, income), TRA = "replace_fill"), mean_GDP = fmean(PCGDP, country, TRA = "replace_fill")) wlddev \%<>\% ftransform(fmedian(list(median_ODA = ODA, median_GDP = PCGDP), list(region, income), TRA = "replace_fill")) # On a groped data frame it is also possible to grouped transform certain columns # but perform aggregate operatins on others: wlddev \%>\% fgroup_by(region, income) \%>\% ftransform(gmedian_GDP = fmedian(PCGDP, GRP(.), TRA = "replace"), omedian_GDP = fmedian(PCGDP, TRA = "replace"), # "replace" preserves NA's omedian_GDP_fill = fmedian(PCGDP)) \%>\% tail rm(wlddev) ## For multi-type data aggregation, the function collap offers ease and flexibility # Aggregate this data by country and decade: Numeric columns with mean, categorical with mode head(collap(wlddev, ~ country + decade, fmean, fmode)) # taking weighted mean and weighted mode: head(collap(wlddev, ~ country + decade, fmean, fmode, w = ~ POP, wFUN = fsum)) # Multi-function aggregation of certain columns head(collap(wlddev, ~ country + decade, list(fmean, fmedian, fsd), list(ffirst, flast), cols = c(3,9:12))) # Customized Aggregation: Assign columns to functions head(collap(wlddev, ~ country + decade, custom = list(fmean = 9:10, fsd = 9:12, flast = 3, ffirst = 6:8))) # For grouped data frames use collapg wlddev \%>\% fsubset(year > 1990, country, region, income, PCGDP:ODA) \%>\% fgroup_by(country) \%>\% collapg(fmean, ffirst) \%>\% ftransform(AMGDP = PCGDP > fmedian(PCGDP, list(region, income), TRA = "replace_fill"), AMODA = ODA > fmedian(ODA, income, TRA = "replace_fill")) \%>\% head ## Additional flexibility for data transformation tasks is offerend by tidy transformation operators # Within-transformation (centering on overall mean) head(W(wlddev, ~ country, cols = 9:12, mean = "overall.mean")) \donttest{ # Partialling out country and year fixed effects head(HDW(wlddev, PCGDP + LIFEEX ~ qF(country) + qF(year))) # Same, adding ODA as continuous regressor head(HDW(wlddev, PCGDP + LIFEEX ~ qF(country) + qF(year) + ODA)) } # Standardizing (scaling and centering) by country head(STD(wlddev, ~ country, cols = 9:12)) # Computing 1 lead and 3 lags of the 4 series head(L(wlddev, -1:3, ~ country, ~year, cols = 9:12)) # Computing the 1- and 10-year first differences head(D(wlddev, c(1,10), 1, ~ country, ~year, cols = 9:12)) head(D(wlddev, c(1,10), 1:2, ~ country, ~year, cols = 9:12)) # ..first and second differences # Computing the 1- and 10-year growth rates head(G(wlddev, c(1,10), 1, ~ country, ~year, cols = 9:12)) # Adding growth rate variables to dataset add_vars(wlddev) <- G(wlddev, c(1, 10), 1, ~ country, ~year, cols = 9:12, keep.ids = FALSE) get_vars(wlddev, "G1.", regex = TRUE) <- NULL # Deleting again \donttest{ % No code relying on suggested package # These operators can conveniently be used in regression formulas: # Using a Mundlak (1978) procedure to estimate the effect of OECD on LIFEEX, controlling for PCGDP lm(LIFEEX ~ log(PCGDP) + OECD + B(log(PCGDP), country), wlddev \%>\% fselect(country, OECD, PCGDP, LIFEEX) \%>\% na_omit) # Adding 10-year lagged life-expectancy to allow for some convergence effects (dynamic panel model) lm(LIFEEX ~ L(LIFEEX, 10, country) + log(PCGDP) + OECD + B(log(PCGDP), country), wlddev \%>\% fselect(country, OECD, PCGDP, LIFEEX) \%>\% na_omit) # Tranformation functions and operators also support plm panel data classes: library(plm) pwlddev <- pdata.frame(wlddev, index = c("country","year")) head(W(pwlddev$PCGDP)) # Country-demeaning head(W(pwlddev, cols = 9:12)) head(W(pwlddev$PCGDP, effect = 2)) # Time-demeaning head(W(pwlddev, effect = 2, cols = 9:12)) head(HDW(pwlddev$PCGDP)) # Country- and time-demeaning head(HDW(pwlddev, cols = 9:12)) head(STD(pwlddev$PCGDP)) # Standardizing by country head(STD(pwlddev, cols = 9:12)) head(L(pwlddev$PCGDP, -1:3)) # Panel-lags head(L(pwlddev, -1:3, 9:12)) head(G(pwlddev$PCGDP)) # Panel-Growth rates head(G(pwlddev, 1, 1, 9:12)) rm(pwlddev) } # Remove all objects used in this example section rm(v, d, w, f, f1, f2, g, mtcarsM, sds, series, wlddev) } \keyword{package} \keyword{manip} collapse/man/descr.Rd0000644000176200001440000001252114167161061014222 0ustar liggesusers\name{descr} \alias{descr} \alias{print.descr} \alias{as.data.frame.descr} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Detailed Statistical Description of Data Frame } \description{ \code{descr} offers a concise description of each variable in a data frame. It is built as a wrapper around \code{\link{qsu}}, but by default also computes frequency tables with percentages for categorical variables, and quantiles and the number of distinct values for numeric variables (next to the mean, sd, min, max, skewness and kurtosis computed by \code{qsu}). } \usage{ descr(X, Ndistinct = TRUE, higher = TRUE, table = TRUE, Qprobs = c(0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99), cols = NULL, label.attr = "label", \dots) \method{print}{descr}(x, n = 7, perc = TRUE, digits = 2, t.table = TRUE, summary = TRUE, \dots) \method{as.data.frame}{descr}(x, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{ a data frame or list of atomic vectors. Atomic vectors, matrices or arrays can be passed but will first be coerced to data frame using \code{\link{qDF}}. } \item{Ndistinct}{ logical. \code{TRUE} (default) computes the number of distinct values on all variables using \code{\link{fndistinct}}. } \item{higher}{ logical. Argument is passed down to \code{\link{qsu}}: \code{TRUE} (default) computes the skewness and the kurtosis. } \item{table}{ logical. \code{TRUE} (default) calls \code{\link{table}} on all categorical variables (excluding \link[=is_date]{Date} variables). } \item{Qprobs}{ double. Probabilities for quantiles to compute on numeric variables, passed down to \code{\link{quantile}}. If something non-numeric is passed (i.e. \code{NULL}, \code{FALSE}, \code{NA}, \code{""} etc.), no quantiles are computed. } \item{cols}{ select columns to describe using column names, indices, a logical vector or a function (e.g. \code{is.numeric}). } \item{label.attr}{ character. The name of a label attribute to display for each variable (if variables are labeled). } \item{\dots}{ other arguments passed to \code{\link{qsu.default}}. } \item{x}{an object of class 'descr'.} \item{n}{integer. The number of first and last entries to display of the table computed for categorical variables. If the number of distinct elements is \code{< 2*n}, the whole table is printed.} \item{perc}{logical. \code{TRUE} (default) adds percentages to the frequencies in the table for categorical variables.} \item{digits}{integer. The number of decimals to print in statistics and percentage tables.} \item{t.table}{logical. \code{TRUE} (default) prints a transposed table.} \item{summary}{logical. \code{TRUE} (default) computes and displays a summary of the frequencies if the size of the table for a categorical variables exceeds \code{2*n}.} } \details{ \code{descr} was heavily inspired by \code{Hmisc::describe}, but computes about 10x faster. The performance is comparable to \code{\link{summary}}. \code{descr} was built as a wrapper around \code{\link{qsu}}, to enrich the set of statistics computed by \code{\link{qsu}} for both numeric and categorical variables. \code{\link{qsu}} itself is yet about 10x faster than \code{descr}, and is optimized for grouped, panel data and weighted statistics. It is possible to also compute grouped, panel data and/or weighted statistics with \code{descr} by passing group-ids to \code{g}, panel-ids to \code{pid} or a weight vector to \code{w}. These arguments are handed down to \code{\link{qsu.default}} and only affect the statistics natively computed by \code{qsu}, i.e. passing a weight vector produces a weighted mean, sd, skewness and kurtosis but not weighted quantiles. The list-object returned from \code{descr} can be converted to a tidy data frame using \code{as.data.frame}. This representation will not include frequency tables computed for categorical variables, and the method cannot handle arrays of statistics (applicable when \code{g} or \code{pid} arguments are passed to \code{descr}, in that case \code{as.data.frame.descr} will throw an appropriate error). } \value{ A 2-level nested list, the top-level containing the statistics computed for each variable, which are themselves stored in a list containing the class, the label, the basic statistics and quantiles / tables computed for the variable. The object is given a class 'descr' and also has the number of observations in the dataset attached as an 'N' attribute, as well as an attribute 'arstat' indicating whether the object contains arrays of statistics, and an attribute 'table' indicating whether \code{table = TRUE} (i.e. the object could contain tables for categorical variables). } \seealso{ \code{\link{qsu}}, \code{\link{pwcor}}, \link[=summary-statistics]{Summary Statistics}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Standard Use descr(iris) descr(wlddev) descr(GGDC10S) as.data.frame(descr(wlddev)) ## Passing Arguments down to qsu: For Panel Data Statistics descr(iris, pid = iris$Species) descr(wlddev, pid = wlddev$iso3c) ## Grouped Statistics descr(iris, g = iris$Species) descr(GGDC10S, g = GGDC10S$Region) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ univar }% use one of RShowDoc("KEYWORDS") collapse/man/fmean.Rd0000644000176200001440000001347014175334642014222 0ustar liggesusers\name{fmean} \alias{fmean} \alias{fmean.default} \alias{fmean.matrix} \alias{fmean.data.frame} \alias{fmean.grouped_df} \title{Fast (Grouped, Weighted) Mean for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{fmean} is a generic function that computes the (column-wise) mean of \code{x}, (optionally) grouped by \code{g} and/or weighted by \code{w}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped, weighted) mean. } \usage{ fmean(x, \dots) \method{fmean}{default}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, \dots) \method{fmean}{matrix}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{fmean}{data.frame}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{fmean}{grouped_df}(x, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{w}{a numeric vector of (non-negative) weights, may contain missing values.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 1 - "replace_fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost. If \code{na.rm = FALSE} a \code{NA} is returned when encountered.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{keep.w}{\emph{grouped_df method:} Logical. Retain summed weighting variable after computation (if contained in \code{grouped_df}).} \item{\dots}{arguments to be passed to or from other methods.} } \details{ % Non-grouped mean computations internally utilize long-doubles in C++, for additional numeric precision. Missing-value removal as controlled by the \code{na.rm} argument is done very efficiently by simply skipping them in the computation (thus setting \code{na.rm = FALSE} on data with no missing values doesn't give extra speed). Large performance gains can nevertheless be achieved in the presence of missing values if \code{na.rm = FALSE}, since then the corresponding computation is terminated once a \code{NA} is encountered and \code{NA} is returned (unlike \code{\link{mean}} which just runs through without any checks). The weighted mean is computed as \code{sum(x * w) / sum(w)}. If \code{na.rm = TRUE}, missing values will be removed from both \code{x} and \code{w} i.e. utilizing only \code{x[complete.cases(x,w)]} and \code{w[complete.cases(x,w)]}. This all seamlessly generalizes to grouped computations, which are performed in a single pass (without splitting the data) and therefore extremely fast. When applied to data frames with groups or \code{drop = FALSE}, \code{fmean} preserves all column attributes (such as variable labels) but does not distinguish between classed and unclassed object (thus applying \code{fmean} to a factor column will give a 'malformed factor' error). The attributes of the data frame itself are also preserved. } \value{ The (\code{w} weighted) mean of \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its mean, grouped by \code{g}. } \seealso{ \code{\link{fmedian}}, \code{\link{fmode}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method mpg <- mtcars$mpg fmean(mpg) # Simple mean fmean(mpg, w = mtcars$hp) # Weighted mean: Weighted by hp fmean(mpg, TRA = "-") # Simple transformation: demeaning (See also ?W) fmean(mpg, mtcars$cyl) # Grouped mean fmean(mpg, mtcars[8:9]) # another grouped mean. g <- GRP(mtcars[c(2,8:9)]) fmean(mpg, g) # Pre-computing groups speeds up the computation fmean(mpg, g, mtcars$hp) # Grouped weighted mean fmean(mpg, g, TRA = "-") # Demeaning by group fmean(mpg, g, mtcars$hp, "-") # Group-demeaning using weighted group means ## data.frame method fmean(mtcars) fmean(mtcars, g) fmean(fgroup_by(mtcars, cyl, vs, am)) # Another way of doing it.. head(fmean(mtcars, g, TRA = "-")) # etc.. ## matrix method m <- qM(mtcars) fmean(m) fmean(m, g) head(fmean(m, g, TRA = "-")) # etc.. \donttest{ % No code relying on suggested package ## method for grouped data frames - created with dplyr::group_by or fgroup_by library(dplyr) mtcars \%>\% group_by(cyl,vs,am) \%>\% fmean() # Ordinary mtcars \%>\% group_by(cyl,vs,am) \%>\% fmean(hp) # Weighted mtcars \%>\% group_by(cyl,vs,am) \%>\% fmean(hp, "-") # Weighted Transform mtcars \%>\% group_by(cyl,vs,am) \%>\% select(mpg,hp) \%>\% fmean(hp, "-") # Only mpg mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% # Equivalent and faster ! fselect(mpg,hp) \%>\% fmean(hp, "-") } } \keyword{univar} \keyword{manip} collapse/man/rapply2d.Rd0000644000176200001440000000276514167160635014676 0ustar liggesusers\name{rapply2d} \alias{rapply2d} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Recursively Apply a Function to a List of Data Objects } \description{ \code{rapply2d} is a recursive version of \code{lapply} with two key differences to \code{\link{rapply}}: (1) Data frames are considered as final objects, not as (sub-)lists, and (2) the result is never simplified / unlisted. } \usage{ rapply2d(l, FUN, \dots, classes = "data.frame") } %- maybe also 'usage' for other objects documented here. \arguments{ \item{l}{a list.} \item{FUN}{a function that can be applied to all elements in l.} \item{\dots}{additional elements passed to FUN.} \item{classes}{character. These are classes of list-based objects inside \code{l} which \code{FUN} should be applied to. Note that \code{FUN} is also applied to all non-list elements in \code{l}. It is thus quite different from the \code{classes} argument to \code{\link{rapply}}.} } \value{ A list of the same structure as \code{l}, where \code{FUN} was applied to all final (atomic) elements and list-based objects of a class included in \code{classes}. } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{rsplit}}, \code{\link{unlist2d}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview} } \examples{ l <- list(mtcars, list(mtcars, as.matrix(mtcars))) rapply2d(l, fmean) unlist2d(rapply2d(l, fmean)) } \keyword{manip} \keyword{list} collapse/man/select_replace_vars.Rd0000644000176200001440000002302114201327077017124 0ustar liggesusers\name{fselect-get_vars-add_vars} % \name{select-replace-vars} % \alias{select-replace-vars} \alias{fselect} \alias{fselect<-} \alias{slt} \alias{slt<-} \alias{get_vars} \alias{gv} \alias{gvr} \alias{num_vars} \alias{nv} \alias{cat_vars} \alias{char_vars} \alias{fact_vars} \alias{logi_vars} \alias{date_vars} \alias{add_vars} \alias{av} \alias{get_vars<-} \alias{gv<-} \alias{gvr<-} \alias{num_vars<-} \alias{nv<-} \alias{cat_vars<-} \alias{char_vars<-} \alias{fact_vars<-} \alias{logi_vars<-} \alias{date_vars<-} \alias{add_vars<-} \alias{av<-} \title{Fast Select, Replace or Add Data Frame Columns} \description{ Efficiently select and replace (or add) a subset of columns from (to) a data frame. This can be done by data type, or using expressions, column names, indices, logical vectors, selector functions or regular expressions matching column names. } \usage{ ## Select and replace variables, analgous to dplyr::select but significantly faster fselect(.x, \dots, return = "data") fselect(x, \dots) <- value slt(.x, \dots, return = "data") # Shortcut for fselect slt(x, \dots) <- value # Shortcut for fselect<- ## Select and replace columns by names, indices, logical vectors, ## regular expressions or using functions to identify columns get_vars(x, vars, return = "data", regex = FALSE, \dots) gv(x, vars, return = "data", \dots) # Shortcut for get_vars gvr(x, vars, return = "data", \dots) # Shortcut for get_vars(\dots, regex = TRUE) get_vars(x, vars, regex = FALSE, \dots) <- value gv(x, vars, \dots) <- value # Shortcut for get_vars<- gvr(x, vars, \dots) <- value # Shortcut for get_vars<-(\dots, regex = TRUE) ## Add columns at any position within a data.frame add_vars(x, \dots, pos = "end") add_vars(x, pos = "end") <- value av(x, \dots, pos = "end") # Shortcut for add_vars av(x, pos = "end") <- value # Shortcut for add_vars<- ## Select and replace columns by data type num_vars(x, return = "data") num_vars(x) <- value nv(x, return = "data") # Shortcut for num_vars nv(x) <- value # Shortcut for num_vars<- cat_vars(x, return = "data") # Categorical variables, see is_categorical cat_vars(x) <- value char_vars(x, return = "data") char_vars(x) <- value fact_vars(x, return = "data") fact_vars(x) <- value logi_vars(x, return = "data") logi_vars(x) <- value date_vars(x, return = "data") # See is_date date_vars(x) <- value } \arguments{ \item{x, .x}{a data frame or list.} \item{value}{a data frame or list of columns whose dimensions exactly match those of the extracted subset of \code{x}. If only 1 variable is in the subset of \code{x}, \code{value} can also be an atomic vector or matrix, provided that \code{NROW(value) == nrow(x)}.} \item{vars}{a vector of column names, indices (can be negative), a suitable logical vector, or a vector of regular expressions matching column names (if \code{regex = TRUE}). It is also possible to pass a function returning \code{TRUE} or \code{FALSE} when applied to the columns of \code{x}.} \item{return}{an integer or string specifying what the selector function should return. The options are: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "data" \tab\tab subset of data frame (default) \cr 2 \tab\tab "names" \tab\tab column names \cr 3 \tab\tab "indices" \tab\tab column indices \cr 4 \tab\tab "named_indices" \tab\tab named column indices \cr 5 \tab\tab "logical" \tab\tab logical selection vector \cr 6 \tab\tab "named_logical" \tab\tab named logical vector \cr } \emph{Note}: replacement functions only replace data, However column names are replaced together with the data (if available). } \item{regex}{logical. \code{TRUE} will do regular expression search on the column names of \code{x} using a (vector of) regular expression(s) passed to \code{vars}. Matching is done using \code{\link{grep}}.} \item{pos}{the position where columns are added in the data frame. \code{"end"} (default) will append the data frame at the end (right) side. "front" will add columns in front (left). Alternatively one can pass a vector of positions (matching \code{length(value)} if value is a list). In that case the other columns will be shifted around the new ones while maintaining their order. } \item{\dots}{for \code{fselect}: column names and expressions e.g. \code{fselect(mtcars, newname = mpg, hp, carb:vs)}. for \code{get_vars}: further arguments passed to \code{\link{grep}}, if \code{regex = TRUE}. For \code{add_vars}: Same as \code{value}, a single argument passed may also be a vector or matrix, multiple arguments must each be a list (they are combined using \code{c(\dots)}).} } \details{ \code{get_vars(<-)} is around 2x faster than \code{`[.data.frame`} and 8x faster than \code{`[<-.data.frame`}, so the common operation \code{data[cols] <- someFUN(data[cols])} can be made 10x more efficient (abstracting from computations performed by \code{someFUN}) using \code{get_vars(data, cols) <- someFUN(get_vars(data, cols))} or the shorthand \code{gv(data, cols) <- someFUN(gv(data, cols))}. Similarly type-wise operations like \code{data[sapply(data, is.numeric)]} or \code{data[sapply(data, is.numeric)] <- value} are facilitated and more efficient using \code{num_vars(data)} and \code{num_vars(data) <- value} or the shortcuts \code{nv} and \code{nv<-} etc. \code{fselect} provides an efficient alternative to \code{dplyr::select}, allowing the selection of variables based on expressions evaluated within the data frame, see Examples. It is about 100x faster than \code{dplyr::select} but also more simple as it does not provide special methods for grouped tibbles. Finally, \code{add_vars(data1, data2, data3, \dots)} is a lot faster than \code{cbind(data1, data2, data3, \dots)}, and preserves the attributes of \code{data1} (i.e. it is like adding columns to \code{data1}). The replacement function \code{add_vars(data) <- someFUN(get_vars(data, cols))} efficiently appends \code{data} with computed columns. The \code{pos} argument allows adding columns at positions other than the end (right) of the data frame, see Examples. All functions introduced here perform their operations class-independent. They all basically work like this: (1) save the attributes of \code{x}, (2) unclass \code{x}, (3) subset, replace or append \code{x} as a list, (4) modify the "names" component of the attributes of \code{x} accordingly and (5) efficiently attach the attributes again to the result from step (3). Thus they can freely be applied to data.table's, grouped tibbles, panel data frames and other classes and will return an object of exactly the same class and the same attributes. % secure w.r.t. redefinitions of \code{`[.data.frame`} or \code{`[<-.data.frame`} for other classes (i.e. data.table's, tibbles etc.) and preserve all attributes of the data } \note{ The functions here only check the length of the first column, which is one of the reasons why they are so fast. When lists of unequal-length columns are offered as replacements this yields a malformed data frame (which will also print a warning in the console i.e. you will notice that). } \seealso{ \code{\link{fsubset}}, \code{\link{ftransform}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Wold Development Data head(fselect(wlddev, Country = country, Year = year, ODA)) # Fast dplyr-like selecting head(fselect(wlddev, -country, -year, -PCGDP)) head(fselect(wlddev, country, year, PCGDP:ODA)) head(fselect(wlddev, -(PCGDP:ODA))) fselect(wlddev, country, year, PCGDP:ODA) <- NULL # Efficient deleting head(wlddev) rm(wlddev) head(num_vars(wlddev)) # Select numeric variables head(cat_vars(wlddev)) # Select categorical (non-numeric) vars head(get_vars(wlddev, is_categorical)) # Same thing num_vars(wlddev) <- num_vars(wlddev) # Replace Numeric Variables by themselves get_vars(wlddev,is.numeric) <- get_vars(wlddev,is.numeric) # Same thing head(get_vars(wlddev, 9:12)) # Select columns 9 through 12, 2x faster head(get_vars(wlddev, -(9:12))) # All except columns 9 through 12 head(get_vars(wlddev, c("PCGDP","LIFEEX","GINI","ODA"))) # Select using column names head(get_vars(wlddev, "[[:upper:]]", regex = TRUE)) # Same thing: match upper-case var. names head(gvr(wlddev, "[[:upper:]]")) # Same thing get_vars(wlddev, 9:12) <- get_vars(wlddev, 9:12) # 9x faster wlddev[9:12] <- wlddev[9:12] add_vars(wlddev) <- STD(gv(wlddev,9:12), wlddev$iso3c) # Add Standardized columns 9 through 12 head(wlddev) # gv and av are shortcuts get_vars(wlddev, 14:17) <- NULL # Efficient Deleting added columns again av(wlddev, "front") <- STD(gv(wlddev,9:12), wlddev$iso3c) # Again adding in Front head(wlddev) get_vars(wlddev, 1:4) <- NULL # Deleting av(wlddev,c(10,12,14,16)) <- W(wlddev,~iso3c, cols = 9:12, # Adding next to original variables keep.by = FALSE) head(wlddev) get_vars(wlddev, c(10,12,14,16)) <- NULL # Deleting } \keyword{manip} collapse/man/fast-statistical-functions.Rd0000644000176200001440000002033414175334642020416 0ustar liggesusers\name{fast-statistical-functions} \alias{A1-fast-statistical-functions} \alias{fast-statistical-functions} \alias{.FAST_STAT_FUN} \alias{.FAST_FUN} \title{Fast (Grouped, Weighted) Statistical Functions for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ With \code{\link{fsum}}, \code{\link{fprod}}, \code{\link{fmean}}, \code{\link{fmedian}}, \code{\link{fmode}}, \code{\link{fvar}}, \code{\link{fsd}}, \code{\link{fmin}}, \code{\link{fmax}}, \code{\link{fnth}}, \code{\link{ffirst}}, \code{\link{flast}}, \code{\link{fnobs}} and \code{\link{fndistinct}}, \emph{collapse} presents a coherent set of extremely fast and flexible statistical functions (S3 generics) to perform column-wise, grouped and weighted computations on atomic vectors, matrices and data frames, with special support for grouped data frames / tibbles (\emph{dplyr}) and \emph{data.table}'s. } \section{Usage}{\preformatted{ ## All functions (FUN) follow a common syntax in 4 methods: FUN(x, ...) ## Default S3 method: FUN(x, g = NULL, [w = NULL,] TRA = NULL, [na.rm = TRUE,] use.g.names = TRUE, ...) ## S3 method for class 'matrix' FUN(x, g = NULL, [w = NULL,] TRA = NULL, [na.rm = TRUE,] use.g.names = TRUE, drop = TRUE, ...) ## S3 method for class 'data.frame' FUN(x, g = NULL, [w = NULL,] TRA = NULL, [na.rm = TRUE,] use.g.names = TRUE, drop = TRUE, ...) ## S3 method for class 'grouped_df' FUN(x, [w = NULL,] TRA = NULL, [na.rm = TRUE,] use.g.names = FALSE, keep.group_vars = TRUE, [keep.w = TRUE,] ...) } } \section{Arguments}{ \tabular{lll}{ \code{x} \tab \tab a vector, matrix, data frame or grouped data frame (class 'grouped_df'). \cr \code{g} \tab \tab a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}. \cr \code{w} \tab \tab a numeric vector of (non-negative) weights, may contain missing values. Supported by \code{\link{fsum}}, \code{\link{fprod}}, \code{\link{fmean}}, \code{\link{fmedian}}, \code{\link{fnth}}, \code{\link{fvar}}, \code{\link{fsd}} and \code{\link{fmode}}. \cr \code{TRA} \tab \tab an integer or quoted operator indicating the transformation to perform: 1 - "replace_fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}. \cr \code{na.rm} \tab \tab logical. Skip missing values in \code{x}. Defaults to \code{TRUE} in all functions and implemented at very little computational cost. Not available for \code{\link{fnobs}}. \cr \code{use.g.names} \tab \tab logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s. \cr \code{drop} \tab \tab \emph{matrix and data.frame methods:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}. \cr \code{keep.group_vars} \tab \tab \emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation. By default grouping variables are added, even if not present in the grouped_df. \cr \code{keep.w} \tab \tab \emph{grouped_df method:} Logical. \code{TRUE} (default) also aggregates weights and saves them in a column, \code{FALSE} removes weighting variable after computation (if contained in \code{grouped_df}). \cr \code{\dots} \tab \tab arguments to be passed to or from other methods, and extra arguments to some functions, i.e. the algorithm used to compute variances etc. \cr } } \section{Details}{ Please see the documentation of individual functions. } \section{Value}{ \code{x} suitably aggregated or transformed. Data frame column-attributes and overall attributes are preserved. } \section{Notes}{ \itemize{ \item Panel-decomposed (i.e. between and within) statistics as well as grouped and weighted skewness and kurtosis are implemented in \code{\link{qsu}}. \item The vector-valued functions and operators \code{\link{fcumsum}}, \code{\link[=fscale]{fscale/STD}}, \code{\link[=fbetween]{fbetween/B}}, \code{\link[=fhdbetween]{fhdbetween/HDB}}, \code{\link[=fwithin]{fwithin/W}}, \code{\link[=fhdwithin]{fhdwithin/HDW}}, \code{\link[=flag]{flag/L/F}}, \code{\link[=fdiff]{fdiff/D/Dlog}} and \code{\link[=fgrowth]{fgrowth/G}} are documented under \link[=data-transformations]{Data Transformations} and \link[=time-series-panel-series]{Time Series and Panel Series}. These functions also support \code{plm::pseries} and \code{plm::pdata.frame}'s. } } \seealso{ \link[=collapse-documentation]{Collapse Overview}, \link[=data-transformations]{Data Transformations}, \link[=time-series-panel-series]{Time Series and Panel Series} } \section{Examples}{\preformatted{ ## default vector method mpg <- mtcars$mpg fsum(mpg) # Simple sum fsum(mpg, TRA = "/") # Simple transformation: divide all values by the sum fsum(mpg, mtcars$cyl) # Grouped sum fmean(mpg, mtcars$cyl) # Grouped mean fmean(mpg, w = mtcars$hp) # Weighted mean, weighted by hp fmean(mpg, mtcars$cyl, mtcars$hp) # Grouped mean, weighted by hp fsum(mpg, mtcars$cyl, TRA = "/") # Proportions / division by group sums fmean(mpg, mtcars$cyl, mtcars$hp, # Subtract weighted group means, see also ?fwithin TRA = "-") ## data.frame method fsum(mtcars) fsum(mtcars, TRA = "\%") # This computes percentages fsum(mtcars, mtcars[c(2,8:9)]) # Grouped column sum g <- GRP(mtcars, ~ cyl + vs + am) # Here precomputing the groups! fsum(mtcars, g) # Faster !! fmean(mtcars, g, mtcars$hp) fmean(mtcars, g, mtcars$hp, "-") # Demeaning by weighted group means.. fmean(fgroup_by(mtcars, cyl, vs, am), hp, "-") # Another way of doing it.. fmode(wlddev, drop = FALSE) # Compute statistical modes of variables in this data fmode(wlddev, wlddev$income) # Grouped statistical modes .. ## matrix method m <- qM(mtcars) fsum(m) fsum(m, g) # .. \donttest{ ## method for grouped data frames - created with dplyr::group_by or fgroup_by library(dplyr) mtcars \%>\% group_by(cyl,vs,am) \%>\% select(mpg,carb) \%>\% fsum() mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fselect(mpg,carb) \%>\% fsum() # equivalent and faster !! mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fsum(TRA = "\%") mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fmean(hp) # weighted grouped mean, save sum of weights mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fmean(hp, keep.group_vars = FALSE) } } } \section{Benchmark}{\preformatted{ ## This compares fsum with data.table (2 threads) and base::rowsum # Starting with small data mtcDT <- qDT(mtcars) f <- qF(mtcars$cyl) library(microbenchmark) microbenchmark(mtcDT[, lapply(.SD, sum), by = f], rowsum(mtcDT, f, reorder = FALSE), fsum(mtcDT, f, na.rm = FALSE), unit = "relative") expr min lq mean median uq max neval cld mtcDT[, lapply(.SD, sum), by = f] 145.436928 123.542134 88.681111 98.336378 71.880479 85.217726 100 c rowsum(mtcDT, f, reorder = FALSE) 2.833333 2.798203 2.489064 2.937889 2.425724 2.181173 100 b fsum(mtcDT, f, na.rm = FALSE) 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 100 a # Now larger data tdata <- qDT(replicate(100, rnorm(1e5), simplify = FALSE)) # 100 columns with 100.000 obs f <- qF(sample.int(1e4, 1e5, TRUE)) # A factor with 10.000 groups microbenchmark(tdata[, lapply(.SD, sum), by = f], rowsum(tdata, f, reorder = FALSE), fsum(tdata, f, na.rm = FALSE), unit = "relative") expr min lq mean median uq max neval cld tdata[, lapply(.SD, sum), by = f] 2.646992 2.975489 2.834771 3.081313 3.120070 1.2766475 100 c rowsum(tdata, f, reorder = FALSE) 1.747567 1.753313 1.629036 1.758043 1.839348 0.2720937 100 b fsum(tdata, f, na.rm = FALSE) 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000 100 a } } \keyword{univar} \keyword{manip} \keyword{documentation} collapse/man/seqid.Rd0000644000176200001440000001432114167156135014235 0ustar liggesusers\name{seqid} \alias{seqid} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Generate Group-Id from Integer Sequences } \description{ \code{seqid} can be used to group sequences of integers in a vector, e.g. \code{seqid(c(1:3, 5:7))} becomes \code{c(rep(1,3), rep(2,3))}. It also supports increments \code{> 1}, unordered sequences, and missing values in the sequence. Some applications are to facilitate identification of, and grouped operations on, (irregular) time series and panels. } \usage{ seqid(x, o = NULL, del = 1L, start = 1L, na.skip = FALSE, skip.seq = FALSE, check.o = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a factor or integer vector. Numeric vectors will be converted to integer i.e. rounded downwards.} \item{o}{an (optional) integer ordering vector specifying the order by which to pass through \code{x}.} \item{del}{integer. The integer deliminating two consecutive points in a sequence. \code{del = 1} lets \code{seqid} track sequences of the form \code{c(1,2,3,..)}, \code{del = 2} tracks sequences \code{c(1,3,5,..)} etc.} \item{start}{integer. The starting value of the resulting sequence id. Default is starting from 1. For C++ programmers, starting from 0 could be a better choice. } \item{na.skip}{logical. Skip missing values in the sequence. The default behavior is skipping such that \code{seqid(c(1, NA, 2))} is regarded as one sequence and coded as \code{c(1, NA, 1)}.} \item{skip.seq}{logical. If \code{na.skip = TRUE}, this changes the behavior such that missing values are viewed as part of the sequence, i.e. \code{seqid(c(1, NA, 3))} is regarded as one sequence and coded as \code{c(1, NA, 1)}.} \item{check.o}{logical. Programmers option: \code{FALSE} prevents checking that each element of \code{o} is in the range \code{[1, length(x)]}, it only checks the length of \code{o}. This gives some extra speed, but will terminate R if any element of \code{o} is too large or too small. } } \details{ \code{seqid} was created primarily as a workaround to deal with problems of computing lagged values, differences and growth rates on irregularly spaced time series and panels before \emph{collapse} version 1.5.0 (\href{https://github.com/SebKrantz/collapse/issues/26}{#26}). Now \code{flag}, \code{fdiff} and \code{fgrowth} natively support irregular data so this workaround is superfluous, except for iterated differencing which is not yet supported with irregular data. % panels because they do not pre-compute an ordering of the data but directly compute the ordering from the supplied id and time variables while providing errors for gaps and repeated time values. see \code{\link{flag}} for computational details. The theory of the workaround was to express an irregular time series or panel series as a regular panel series with a group-id created such that the time-periods within each group are consecutive. \code{seqid} makes this very easy: For an irregular panel with some gaps or repeated values in the time variable, an appropriate id variable can be generated using \code{settransform(data, newid = seqid(time, radixorder(id, time)))}. Lags can then be computed using \code{L(data, 1, ~newid, ~time)} etc. %A simple solution to applying existing functionality (\code{flag}, \code{fdiff} and \code{fgrowth}) to irregular time series and panels is thus to create a group-id that fully identifies the data together with the time variable. % This way \emph{collapse} maintains a balance between offering very fast computations on regular time series and panels (which may be unbalanced but where observations for each entity are consecutive in time), and flexibility of application. In general, for any regularly spaced panel the identity given by \code{identical(groupid(id, order(id, time)), seqid(time, order(id, time)))} should hold. Regularly spaced panels with gaps in time (such as a panel-survey with measurements every 2 years) can be handled either by \code{seqid(\dots, del = gap)} or, in most cases, simply by converting the time variable to factor using \code{\link{qF}}, which will make observations consecutive. % \enumerate{ % \item Sort the data in ascending order (e.g. using \code{data.table::setorder(data, time)} for time series and \code{data.table::setorder(data, id, time)} for panels) % \item Generate a new id variable using \code{seqid} (e.g. \code{settransform(data, newid = seqid(time))}) % \item Use the new id to identify the data together with the time variable (e.g. compute a panel-lag using \code{L(data, 1, ~newid, ~time)} or create a panel data frame: \code{pdata <- plm::pdata.frame(data, index = c("newid", "time")); L(pdata)}) % } There are potentially other more analytical applications for \code{seqid}\dots For the opposite operation of creating a new time-variable that is consecutive in each group, see \code{data.table::rowid}. } \value{ An integer vector of class 'qG'. See \code{\link{qG}}. } \seealso{ \code{\link{groupid}}, \code{\link{qG}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## This creates an irregularly spaced panel, with a gap in time for id = 2 data <- data.frame(id = rep(1:3, each = 4), time = c(1:4, 1:2, 4:5, 1:4), value = rnorm(12)) data ## This gave a gaps in time error previous to collapse 1.5.0 L(data, 1, value ~ id, ~time) ## Generating new id variable (here seqid(time) would suffice as data is sorted) settransform(data, newid = seqid(time, order(id, time))) data ## Lag the panel this way L(data, 1, value ~ newid, ~time) \donttest{ ## A different possibility: Creating a consecutive time variable settransform(data, newtime = data.table::rowid(id)) data L(data, 1, value ~ id, ~newtime) } ## With sorted data, the time variable can also just be omitted.. L(data, 1, value ~ id) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{ts} \keyword{manip} % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/fast-grouping.Rd0000644000176200001440000001471114172367040015713 0ustar liggesusers\name{fast-grouping-ordering} \alias{A2-fast-grouping-ordering} \alias{fast-grouping-ordering} \title{Fast Grouping and Ordering} % \emph{collapse} \description{ \emph{collapse} provides the following functions to efficiently group and order data: \itemize{ \item \code{\link{radixorder}}, provides fast radix-ordering through direct access to the method \code{\link[=order]{order(..., method = "radix")}}, as well as the possibility to return some attributes very useful for grouping data and finding unique elements. \code{\link{radixorderv}} exists as a programmers alternative. The function \code{\link[=roworder]{roworder(v)}} efficiently reorders a data frame based on an ordering computed by \code{\link{radixorderv}}. %The source code for both \code{\link{radixorder}} and \code{\link{order(\dots, method = "radix")}, comes from \code{data.table:::forder}. %\code{\link{radixorder}} was modified to optionally return either a vector of group starts, a vector of group sizes, or both as an attribute, and also an attribute providing the size of the largest group and a logical statement on whether the input was already ordered. The function \code{\link{radixorderv}} exists as a programmers alternative. \item \code{\link{group}} provides fast grouping in first-appearance order of rows, based on a hashing algorithm in C. Objects have class 'qG', see below. \item \code{\link{GRP}} creates \emph{collapse} grouping objects of class 'GRP' based on \code{\link{radixorderv}} or \code{\link{group}}. 'GRP' objects form the central building block for grouped operations and programming in \emph{collapse} and are very efficient inputs to all \emph{collapse} functions supporting grouped operations. A 'GRP' object provides information about (1) the number of groups, (2) which rows belong to which group, (3) the group sizes, (4) the unique groups, (5) the variables used for grouping, (6) whether the grouping and initial inputs were ordered and (7) (optionally) the output from \code{\link{radixorder}} containing the ordering vector with group starts and maximum group size attributes. \item \code{\link{fgroup_by}} provides a fast replacement for \code{dplyr::group_by}, creating a grouped data frame (or data.table / tibble etc.) with a 'GRP' object attached. This grouped frame can be used for grouped operations using \emph{collapse}'s fast functions. % \emph{dplyr} functions will treat this tibble like an ordinary (non-grouped) one. \item \code{\link{funique}} is a faster version of \code{\link{unique}}. The data frame method also allows selecting unique rows according to a subset of the columns. \item \code{\link{qF}}, shorthand for 'quick-factor' implements very fast factor generation from atomic vectors using either radix ordering \code{method = "radix"} or hashing \code{method = "hash"}. Factors can also be used for efficient grouped programming with \emph{collapse} functions, especially if they are generated using \code{qF(x, na.exclude = FALSE)} which assigns a level to missing values and attaches a class 'na.included' ensuring that no additional missing value checks are executed by \emph{collapse} functions. \item \code{\link{qG}}, shorthand for 'quick-group', generates a kind of factor-light without the levels attribute but instead an attribute providing the number of levels. Optionally the levels / groups can be attached, but without converting them to character. Objects have a class 'qG', which is also recognized in the \emph{collapse} ecosystem. \item \code{\link{fdroplevels}} is a substantially faster replacement for \code{\link{droplevels}}. \item \code{\link{finteraction}} is a fast alternative to \code{\link{interaction}} implemented as a wrapper around \code{as_factor_GRP(GRP(\dots))}. It can be used to generate a factor from multiple vectors, factors or a list of vectors / factors. Unused factor levels are always dropped. \item \code{\link{groupid}} is a generalization of \code{data.table::rleid} providing a run-length type group-id from atomic vectors. It is generalization as it also supports passing an ordering vector and skipping missing values. For example \code{\link{qF}} and \code{\link{qG}} with \code{method = "radix"} are essentially implemented using \code{groupid(x, radixorder(x))}. \item \code{\link{seqid}} is a specialized function which creates a group-id from sequences of integer values. For any regular panel dataset \code{groupid(id, order(id, time))} and \code{seqid(time, order(id, time))} provide the same id variable. \code{\link{seqid}} is especially useful for identifying discontinuities in time-sequences. } } \section{Table of Functions}{ \tabular{lllll}{\emph{ Function / S3 Generic } \tab\tab \emph{ Methods } \tab\tab \emph{ Description } \cr \code{\link[=radixorder]{radixorder(v)}} \tab\tab No methods, for data frames and vectors \tab\tab Radix-based ordering + grouping information \cr \code{\link[=roworder]{roworder(v)}} \tab\tab No methods, for data frames \tab\tab Row sorting/reordering \cr \code{\link{group}} \tab\tab No methods, for data frames and vectors \tab\tab Hash-based grouping + grouping information \cr \code{\link{GRP}} \tab\tab \code{default, GRP, factor, qG, grouped_df, pseries, pdata.frame} \tab\tab Fast grouping and a flexible grouping object \cr \code{\link{fgroup_by}} \tab\tab No methods, for data frames \tab\tab Fast grouped data frame \cr \code{\link{funique}} \tab\tab \code{default, data.frame} \tab\tab Fast unique values/rows \cr \code{\link{qF}} \tab\tab No methods, for vectors \tab\tab Quick factor generation \cr \code{\link{qG}} \tab\tab No methods, for vectors \tab\tab Quick grouping of vectors and a 'factor-light' class \cr \code{\link{fdroplevels}} \tab\tab \code{factor, data.frame, list} \tab\tab Fast removal of unused factor levels \cr \code{\link{finteraction}} \tab\tab No methods, for data frames and vectors \tab\tab Fast interactions \cr \code{\link{groupid}} \tab\tab No methods, for vectors \tab\tab Run-length type group-id \cr \code{\link{seqid}} \tab\tab No methods, for vectors \tab\tab Run-length type integer sequence-id \cr } } \seealso{ \link[=collapse-documentation]{Collapse Overview}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=fast-statistical-functions]{Fast Statistical Functions} } \keyword{manip} \keyword{documentation} collapse/man/varying.Rd0000644000176200001440000001252714167161051014606 0ustar liggesusers\name{varying} \alias{varying} \alias{varying.default} \alias{varying.matrix} \alias{varying.data.frame} \alias{varying.pseries} \alias{varying.pdata.frame} \alias{varying.grouped_df} \alias{varying.sf} \title{Fast Check of Variation in Data} % Vectors, Matrix and Data Frame Columns} \description{ \code{varying} is a generic function that (column-wise) checks for variation in the values of \code{x}, (optionally) within the groups \code{g} (e.g. a panel-identifier). } \usage{ varying(x, ...) \method{varying}{default}(x, g = NULL, any_group = TRUE, use.g.names = TRUE, ...) \method{varying}{matrix}(x, g = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) \method{varying}{data.frame}(x, by = NULL, cols = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) # Methods for compatibility with plm: \method{varying}{pseries}(x, effect = 1L, any_group = TRUE, use.g.names = TRUE, ...) \method{varying}{pdata.frame}(x, effect = 1L, cols = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) # Methods for grouped data frame / compatibility with dplyr: \method{varying}{grouped_df}(x, any_group = TRUE, use.g.names = FALSE, drop = TRUE, keep.group_vars = TRUE, ...) # Methods for grouped data frame / compatibility with sf: \method{varying}{sf}(x, by = NULL, cols = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a vector, matrix, data.frame, panel series (\code{plm::pseries}), panel data frame (\code{plm::pdata.frame}) or grouped data frame (class 'grouped_df'). Data must not be numeric.} \item{g}{a factor, \code{GRP} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{GRP} object) used to group \code{x}.} \item{by}{same as \code{g}, but also allows one- or two-sided formulas i.e. \code{~ group1 + group2} or \code{var1 + var2 ~ group1 + group2}. See Examples} \item{any_group}{logical. If \code{!is.null(g)}, \code{FALSE} will check and report variation in all groups, whereas the default \code{TRUE} only checks if there is variation within any group. See Examples.} \item{cols}{select columns using column names, indices or a function (e.g. \code{is.numeric}). Two-sided formulas passed to \code{by} overwrite \code{cols}.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{drop}{\emph{matrix and data.frame methods:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if the result is 1-dimensional.} \item{effect}{\emph{plm} methods: Select the panel identifier by which variation in the data should be examined. 1L takes the first variable in the \code{plm::index}, 2L the second etc.. Index variables can also be called by name. More than one index variable can be supplied, which will be interacted.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{...}{arguments to be passed to or from other methods.} } \details{ Without groups passed to \code{g}, \code{varying} simply checks if there is any variation in the columns of \code{x} and returns \code{TRUE} for each column where this is the case and \code{FALSE} otherwise. A set of data points is defined as varying if it contains at least 2 distinct non-missing values (such that a non-0 standard deviation can be computed on numeric data). \code{varying} checks for variation in both numeric and non-numeric data. If groups are supplied to \code{g} (or alternatively a \emph{grouped_df} to \code{x}), \code{varying} can operate in one of 2 modes: \itemize{ \item If \code{any_group = TRUE} (the default), \code{varying} checks each column for variation in any of the groups defined by \code{g}, and returns \code{TRUE} if such within-variation was detected and \code{FALSE} otherwise. Thus only one logical value is returned for each column and the computation on each column is terminated as soon as any variation within any group was found. \item If \code{any_group = FALSE}, \code{varying} runs through the entire data checking each group for variation and returns, for each column in \code{x}, a logical vector reporting the variation check for all groups. If a group contains only missing values, a \code{NA} is returned for that group. } The \emph{sf} method simply ignores the geometry column. } \value{ A logical vector or (if \code{!is.null(g)} and \code{any_group = FALSE}), a matrix or data frame of logical vectors indicating whether the data vary (over the dimension supplied by \code{g}). } \seealso{ \link[=summary-statistics]{Summary Statistics}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Checks overall variation in all columns varying(wlddev) ## Checks whether data are time-variant i.e. vary within country varying(wlddev, ~ country) ## Same as above but done for each country individually, countries without data are coded NA head(varying(wlddev, ~ country, any_group = FALSE)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % use one of RShowDoc("KEYWORDS") collapse/man/arithmetic.Rd0000644000176200001440000001237414175334642015267 0ustar liggesusers\name{arithmetic} \alias{arithmetic} \alias{\%rr\%} \alias{\%r+\%} \alias{\%r-\%} \alias{\%r*\%} \alias{\%r/\%} \alias{\%cr\%} \alias{\%c+\%} \alias{\%c-\%} \alias{\%c*\%} \alias{\%c/\%} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Row/Column Arithmetic for Matrix-Like Objects } \description{ Fast operators to perform row- or column-wise replacing and sweeping operations of vectors on matrices, data frames, lists. } \usage{ ## Perform the operation with v and each row of X X \%rr\% v # Replace rows of X with v X \%r+\% v # Add v to each row of X X \%r-\% v # Subtract v from each row of X X \%r*\% v # Multiply each row of X with v X \%r/\% v # Divide each row of X by v ## Perform a column-wise operation between V and X X \%cr\% V # Replace columns of X with V X \%c+\% V # Add V to columns of X X \%c-\% V # Subtract V from columns of X X \%c*\% V # Multiply columns of X with V X \%c/\% V # Divide columns of X by V } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{a vector, matrix, data frame or list like object (with rows (r) columns (c) matching \code{v} / \code{V}).} \item{v}{for row operations: an atomic vector of matching \code{NCOL(X)}. If \code{X} is a data frame, \code{v} can also be a list of scalar atomic elements. It is also possible to sweep lists of vectors \code{v} out of lists of matrices or data frames \code{X}.} \item{V}{for column operations: a suitable scalar, vector, or matrix / data frame matching \code{NROW(X)}. \code{X} can also be a list of vectors / matrices in which case \code{V} can be a scalar / vector / matrix or matching list of scalars / vectors / matrices.} } \details{ With a matrix or data frame \code{X}, the default behavior of R when calling \code{X op v} (such as multiplication \code{X * v}) is to perform the operation of \code{v} with each column of \code{X}. The equivalent operation is performed by \code{X \%cop\% V}, with the difference that it computes significantly faster if \code{X}/\code{V} is a data frame / list. A more complex but frequently required task is to perform an operation with \code{v} on each row of \code{X}. This is provided based on efficient C++ code by the \code{\%rop\%} set of functions, e.g. \code{X \%r*\% v} efficiently multiplies \code{v} to each row of \code{X}. } \value{ \code{X} where the operation with \code{v} / \code{V} was performed on each row or column. All attributes of \code{X} are preserved. } \note{ \emph{Computations and Output:} These functions are all quite simple, they only work with \code{X} on the LHS i.e. \code{v \%op\% X} will likely fail. The row operations are simple wrappers around \code{\link{TRA}} which provides more operations including grouped replacing and sweeping (where \code{v} would be a matrix or data frame with less rows than \code{X} being mapped to the rows of \code{X} by grouping vectors). One consequence is that just like \code{\link{TRA}}, row-wise mathematical operations (+, -, *, /) always yield numeric output, even if both \code{X} and \code{v} may be integer. This is different for column- operations which depend on base R and may also preserve integer data. \emph{Rules of Arithmetic:} Since these operators are defined as simple infix functions, the normal rules of arithmetic are not respected. So \code{a \%c+\% b \%c*\% c} evaluates as \code{(a \%c+\% b) \%c*\% c}. As with all chained infix operations, they are just evaluated sequentially from left to right. \emph{Performance Notes:} The function \code{\link{setop}} and a related set of \code{\%op=\%} operators can be used to perform these operations by reference, and are faster if copies of the output are not required!! Furthermore, for Fast Statistical Functions, using \code{fmedian(X, TRA = "-")} will be a tiny bit faster than \code{X \%r-\% fmedian(X)}. Also use \code{fwithin(X)} for fast centering using the mean, and \code{fscale(X)} for fast scaling and centering or mean-preserving scaling. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{setop}}, \code{\link{TRA}}, \code{\link{dapply}}, \link[=efficient-programming]{Efficient Programming}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Using data frame's / lists v <- mtcars$cyl mtcars \%cr\% v mtcars \%c-\% v mtcars \%r-\% seq_col(mtcars) mtcars \%r-\% lapply(mtcars, quantile, 0.28) mtcars \%c*\% 5 # Significantly faster than mtcars * 5 mtcars \%c*\% mtcars # Significantly faster than mtcars * mtcars ## Using matrices X <- qM(mtcars) X \%cr\% v X \%c-\% v X \%r-\% dapply(X, quantile, 0.28) \donttest{ ## Chained Operations library(magrittr) # Note: Used because |> is not available on older R versions mtcars \%>\% fwithin() \%r-\% rnorm(11) \%c*\% 5 \%>\% tfm(mpg = fsum(mpg)) \%>\% qsu() } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{manip} \keyword{math} % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/TRA.Rd0000644000176200001440000002240614167156464013566 0ustar liggesusers\name{TRA} \alias{TRA} \alias{TRA.default} \alias{TRA.matrix} \alias{TRA.data.frame} \alias{TRA.grouped_df} \title{ Transform Data by (Grouped) Replacing or Sweeping out Statistics } \description{ \code{TRA} is an S3 generic that efficiently transforms data by either (column-wise) replacing data values with supplied statistics or sweeping the statistics out of the data. \code{TRA} supports grouped sweeping and replacing operations, and is thus a generalization of \code{\link{sweep}}. } \usage{ TRA(x, STATS, FUN = "-", ...) \method{TRA}{default}(x, STATS, FUN = "-", g = NULL, ...) \method{TRA}{matrix}(x, STATS, FUN = "-", g = NULL, ...) \method{TRA}{data.frame}(x, STATS, FUN = "-", g = NULL, ...) \method{TRA}{grouped_df}(x, STATS, FUN = "-", keep.group_vars = TRUE, ...) } \arguments{ \item{x}{a atomic vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{STATS}{a matching set of summary statistics. See Details and Examples.} \item{FUN}{an integer or character string indicating the operation to perform. There are 10 supported operations: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "replace_fill" \tab\tab replace and overwrite missing values in \code{x} \cr 2 \tab\tab "replace" \tab\tab replace but preserve missing values in \code{x} \cr 3 \tab\tab "-" \tab\tab subtract (i.e. center) \cr 4 \tab\tab "-+" \tab\tab subtract group-statistics but add group-frequency weighted average of group statistics (i.e. center on overall average statistic) \cr 5 \tab\tab "/" \tab\tab divide (i.e. scale. For mean-preserving scaling see also \code{\link{fscale}}) \cr 6 \tab\tab "\%" \tab\tab compute percentages (i.e. divide and multiply by 100) \cr 7 \tab\tab "+" \tab\tab add \cr 8 \tab\tab "*" \tab\tab multiply \cr 9 \tab\tab "\%\%" \tab\tab modulus (i.e. remainder from division by \code{STATS}) \cr 10 \tab\tab "-\%\%" \tab\tab subtract modulus (i.e. floor data by \code{STATS}) } } \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}. Number of groups must match rows of \code{STATS}. See Details.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation. See Details and Examples.} \item{...}{arguments to be passed to or from other methods.} } \details{ Without groups (\code{g = NULL}), \code{TRA} is nothing more than a column based version of \code{\link{sweep}}, albeit 4-times more efficient on matrices and many times more efficient on data frames. In this case all methods support an atomic vector of statistics of length \code{NCOL(x)} passed to \code{STATS}. The matrix and data frame methods also support a 1-row matrix or 1-row data frame / list, respectively. \code{TRA} always preserves all attributes of \code{x}. With groups passed to \code{g}, \code{STATS} needs to be of the same type as \code{x} and of appropriate dimensions [such that \code{NCOL(x) == NCOL(STATS)} and \code{NROW(STATS)} equals the number of groups (i.e. the number of levels if \code{g} is a factor)]. If this condition is satisfied, \code{TRA} will assume that the first row of \code{STATS} is the set of statistics computed on the first group/level of \code{g}, the second row on the second group/level etc. and do groupwise replacing or sweeping out accordingly. For example Let \code{x = c(1.2, 4.6, 2.5, 9.1, 8.7, 3.3)}, g is an integer vector in 3 groups \code{g = c(1,3,3,2,1,2)} and \code{STATS = fmean(x,g) = c(4.95, 6.20, 3.55)}. Then \code{out = TRA(x,STATS,"-",g) = c(-3.75, 1.05, -1.05, 2.90, 3.75, -2.90)} [same as \code{fmean(x, g, TRA = "-")}] does the equivalent of the following for-loop: \code{for(i in 1:6) out[i] = x[i] - STATS[g[i]]}. Correct computation requires that \code{g} as used in \code{fmean} and \code{g} passed to \code{TRA} are exactly the same vector. Using \code{g = c(1,3,3,2,1,2)} for \code{fmean} and \code{g = c(3,1,1,2,3,2)} for \code{TRA} will not give the right result. The safest way of programming with \code{TRA} is thus to repeatedly employ the same factor or \code{\link{GRP}} object for all grouped computations. Atomic vectors passed to \code{g} will be converted to factors (see \code{\link{qF}}) and lists will be converted to \code{\link{GRP}} objects. This is also done by all \link[=fast-statistical-functions]{Fast Statistical Functions} and by default by \code{\link{BY}}, thus together with these functions, \code{TRA} can also safely be used with atomic- or list-groups. Problems may arise if functions from other packages internally group atomic vectors or lists in a non-sorted way. [\emph{Note}: \code{as.factor} conversions are ok as this also involves sorting.] %In contrast to the other methods, \code{TRA.grouped_df} matches column names exactly, thus \code{STATS} can be any subset of aggregated columns in \code{x} in any order, with or without grouping columns. \code{TRA.grouped_df} will transform the columns in \code{x} with their aggregated versions matched from \code{STATS} (ignoring grouping columns found in \code{x} or \code{STATS} and columns in \code{x} not found in \code{STATS}), and return \code{x} again. If \code{x} is a grouped data frame ('grouped_df'), \code{TRA} matches the columns of \code{x} and \code{STATS} and also checks for grouping columns in \code{x} and \code{STATS}. \code{TRA.grouped_df} will then only transform those columns in \code{x} for which matching counterparts were found in \code{STATS} (exempting grouping columns) and return \code{x} again (with columns in the same order). If \code{keep.group_vars = FALSE}, the grouping columns are dropped after computation, however the "groups" attribute is not dropped (it can be removed using \code{\link[=fungroup]{fungroup()}} or \code{dplyr::ungroup()}). } \value{ \code{x} with columns replaced or swept out using \code{STATS}, (optionally) grouped by \code{g}. } \note{ In most cases there is no need to call the \code{TRA()} function, because of the TRA-argument to all \link[=fast-statistical-functions]{Fast Statistical Functions} (ensuring that the exact same grouping vector is used for computing statistics and subsequent transformation). In addition the functions \code{\link[=fbetween]{fbetween/B}} and \code{\link[=fwithin]{fwithin/W}} and \code{\link[=fscale]{fscale/STD}} provide optimized solutions for frequent scaling, centering and averaging tasks. %\code{TRA} is really a programmers function for cases when both aggregate statistics and transformed data need to be retained, or to work with more complex statistics (i.e. together with \code{\link{dapply}} or \code{\link{BY}}). } \seealso{ \code{\link{sweep}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ v <- iris$Sepal.Length # A numeric vector f <- iris$Species # A factor dat <- num_vars(iris) # Numeric columns m <- qM(dat) # Matrix of numeric data head(TRA(v, fmean(v))) # Simple centering [same as fmean(v, TRA = "-") or W(v)] head(TRA(m, fmean(m))) # [same as sweep(m, 2, fmean(m)), fmean(m, TRA = "-") or W(m)] head(TRA(dat, fmean(dat))) # [same as fmean(dat, TRA = "-") or W(dat)] head(TRA(v, fmean(v), "replace")) # Simple replacing [same as fmean(v, TRA = "replace") or B(v)] head(TRA(m, fmean(m), "replace")) # [same as sweep(m, 2, fmean(m)), fmean(m, TRA = 1L) or B(m)] head(TRA(dat, fmean(dat), "replace")) # [same as fmean(dat, TRA = "replace") or B(dat)] head(TRA(m, fsd(m), "/")) # Simple scaling... [same as fsd(m, TRA = "/")]... # Note: All grouped examples also apply for v and dat... head(TRA(m, fmean(m, f), "-", f)) # Centering [same as fmean(m, f, TRA = "-") or W(m, f)] head(TRA(m, fmean(m, f), "replace", f)) # Replacing [same fmean(m, f, TRA = "replace") or B(m, f)] head(TRA(m, fsd(m, f), "/", f)) # Scaling [same as fsd(m, f, TRA = "/")] head(TRA(m, fmean(m, f), "-+", f)) # Centering on the overall mean ... # [same as fmean(m, f, TRA = "-+") or # W(m, f, mean = "overall.mean")] head(TRA(TRA(m, fmean(m, f), "-", f), # Also the same thing done manually !! fmean(m), "+")) \donttest{ % No code relying on suggested package # grouped tibble method library(dplyr) iris \%>\% group_by(Species) \%>\% TRA(fmean(.)) iris \%>\% group_by(Species) \%>\% fmean(TRA = "-") # Same thing iris \%>\% group_by(Species) \%>\% TRA(fmean(.)[c(2,4)]) # Only transforming 2 columns iris \%>\% group_by(Species) \%>\% TRA(fmean(.)[c(2,4)], # Dropping species column keep.group_vars = FALSE) iris \%>\% fgroup_by(Species) \%>\% TRA(fmean(.)) # Faster collapse grouping... } } % Add one or more standard keywords, see file 'KEYWORDS' in the R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line collapse/man/fmedian.Rd0000644000176200001440000001563014175334642014537 0ustar liggesusers\name{fmedian} \alias{fmedian} \alias{fmedian.default} \alias{fmedian.matrix} \alias{fmedian.data.frame} \alias{fmedian.grouped_df} \title{Fast (Grouped, Weighted) Median Value for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{fmedian} is a generic function that computes the (column-wise) median value of all values in \code{x}, (optionally) grouped by \code{g} and/or weighted by \code{w}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped, weighted) median value. } \usage{ fmedian(x, \dots) \method{fmedian}{default}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, \dots) \method{fmedian}{matrix}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{fmedian}{data.frame}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{fmedian}{grouped_df}(x, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{w}{a numeric vector of (non-negative) weights, may contain missing values, but only if \code{x} is also missing. } \item{TRA}{an integer or quoted operator indicating the transformation to perform: 1 - "replace_fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost. If \code{na.rm = FALSE} a \code{NA} is returned when encountered.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{keep.w}{\emph{grouped_df method:} Logical. Retain summed weighting variable after computation (if contained in \code{grouped_df}).} \item{\dots}{arguments to be passed to or from other methods.} } \details{ Median value estimation is done using \code{std::nth_element} in C++, which is an efficient partial sorting algorithm. A downside of this is that vectors need to be copied first and then partially sorted, thus \code{fmedian} currently requires additional memory equal to the size of the vector (\code{x} or a column of \code{x}). Grouped computations are currently performed by mapping the data to a sparse-array and then partially sorting each row (group) of that array. Because of compiler optimizations this requires less memory than a full deep copy done with no groups. % For reasons I don't fully understand this requires less memory than a full deep copy which is done with no groups. The weighted median is defined as the element \code{k} from a set of sorted elements, such that the sum of weights of all elements larger and all elements smaller than k is \code{<= sum(w)/2}. If the half-sum of weights (\code{sum(w)/2}) is reached exactly for some element k, then (summing from the lower end) both k and k+1 would qualify as the weighted median (and some possible additional elements with zero weights following k would also qualify). \code{fmedian} solves these ties by taking a simple arithmetic mean of all elements qualifying as the weighted median. The weighted median is computed using \code{\link{radixorder}} to first obtain an ordering of all elements, so it is considerably more computationally expensive than the unweighted version. With groups, the entire vector is also ordered, and the weighted median is computed in a single ordered pass through the data (after group-summing the weights, skipping weights for which \code{x} is missing). If \code{x} is a matrix or data frame, these computations are performed independently for each column. When applied to data frames with groups or \code{drop = FALSE}, \code{fmedian} preserves all column attributes (such as variable labels) but does not distinguish between classed and unclassed objects. The attributes of the data frame itself are also preserved. %smaller to the left and to the right of %With weights, %a lower weighted median is calculated using a fast radix-sort of the values in each column. The lower weighted median is defined as the smallest element \code{sort(x)[i]} for which \code{sum(w[order(x)][-seq_len(i)]) <= sum(w)/2}. } \value{ The (\code{w} weighted) median value of \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its median, grouped by \code{g}. } \seealso{ \code{\link{fnth}}, \code{\link{fmean}}, \code{\link{fmode}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method mpg <- mtcars$mpg fmedian(mpg) # Simple median value fmedian(mpg, w = mtcars$hp) # Weighted median: Weighted by hp fmedian(mpg, TRA = "-") # Simple transformation: Subtract median value fmedian(mpg, mtcars$cyl) # Grouped median value fmedian(mpg, mtcars[c(2,8:9)]) # More groups.. g <- GRP(mtcars, ~ cyl + vs + am) # Precomputing groups gives more speed ! fmedian(mpg, g) fmedian(mpg, g, mtcars$hp) # Grouped weighted median fmedian(mpg, g, TRA = "-") # Groupwise subtract median value fmedian(mpg, g, mtcars$hp, "-") # Groupwise subtract weighted median value ## data.frame method fmedian(mtcars) head(fmedian(mtcars, TRA = "-")) fmedian(mtcars, g) fmedian(fgroup_by(mtcars, cyl, vs, am)) # Another way of doing it.. fmedian(mtcars, g, use.g.names = FALSE) # No row-names generated ## matrix method m <- qM(mtcars) fmedian(m) head(fmedian(m, TRA = "-")) fmedian(m, g) # etc.. \donttest{ % No code relying on suggested package library(dplyr) # grouped_df method mtcars \%>\% group_by(cyl,vs,am) \%>\% fmedian() mtcars \%>\% group_by(cyl,vs,am) \%>\% fmedian(hp) # Weighted mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fmedian() # Faster grouping! mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fmedian(TRA = "-") # De-median mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fselect(mpg, hp) \%>\% # Faster selecting fmedian(hp, "-") # Weighted de-median mpg, using hp as weights } } \keyword{univar} \keyword{manip} collapse/man/fast-data-manipulation.Rd0000644000176200001440000001362314170153606017470 0ustar liggesusers\name{fast-data-manipulation} \alias{A3-fast-data-manipulation} \alias{fast-data-manipulation} \title{Fast Data Manipulation} \description{ \emph{collapse} provides the following functions for fast manipulation of (mostly) data frames. \itemize{ \item \code{\link{fselect}} is a much faster alternative to \code{dplyr::select} to select columns using expressions involving column names. \code{\link{get_vars}} is a more versatile and programmer friendly function to efficiently select and replace columns by names, indices, logical vectors, regular expressions or using functions to identify columns. \item The functions \code{\link{num_vars}}, \code{\link{cat_vars}}, \code{\link{char_vars}}, \code{\link{fact_vars}}, \code{\link{logi_vars}} and \code{\link{date_vars}} are convenience functions to efficiently select and replace columns by data type. \item \code{\link{add_vars}} efficiently adds new columns at any position within a data frame (default at the end). This can be done vie replacement (i.e. \code{add_vars(data) <- newdata}) or returning the appended data (i.e. \code{add_vars(data, newdata1, newdata2, \dots)}). Because of the latter, \code{add_vars} is also a more efficient alternative to \code{cbind.data.frame}. \item \code{\link{fsubset}} is a much faster version of \code{\link{subset}} to efficiently subset vectors, matrices and data frames. If the non-standard evaluation offered by \code{\link{fsubset}} is not needed, the function \code{\link{ss}} is a much faster and also more secure alternative to \code{[.data.frame}. \item \code{\link{fsummarise}} is a much faster version of \code{dplyr::summarise} when used together with the \link[=fast-statistical-functions]{Fast Statistical Functions} and \code{\link{fgroup_by}}, with whom it also supports super fast weighted aggregation. \item \code{\link{fmutate}} is a much faster version of \code{dplyr::mutate} when used together with the \link[=fast-statistical-functions]{Fast Statistical Functions} as well as fast \link[=data-transformations]{Data Transformation Functions} and \code{\link{fgroup_by}}. \item \code{\link{ftransform}} is a much faster version of \code{\link{transform}}, which also supports list input and nested pipelines. \code{\link{settransform}} does all of that by reference, i.e. it modifies the data frame in the global environment. \code{\link{fcompute}} is similar to \code{\link{ftransform}} but only returns modified and computed columns in a new data frame. %As a new feature, it is now possible to bulk-process columns with \code{\link{ftransform}}, i.e. \code{ftransform(data, fscale(data[1:2]))} is the same as \code{ftransform(data, col1 = fscale(col1), col2 = fscale(col2))}, and \code{ftransform(data) <- fscale(data[1:2]))} or \code{settransform(data, fscale(data[1:2]))} are both equivalent to \code{data[1:2] <- fscale(data[1:2]))}. Non-matching columns are added to the data.frame. \item \code{\link{roworder}} is a fast substitute for \code{dplyr::arrange}, but the syntax is inspired by \code{data.table::setorder}. \item \code{\link{colorder}} efficiently reorders columns in a data frame, see also \code{data.table::setcolorder}. \item \code{\link{frename}} is a fast substitute for \code{dplyr::rename}, to efficiently rename various objects. \code{\link{setrename}} renames objects by reference. \code{\link{relabel}} and \code{\link{setrelabel}} do the same thing for variable labels (see also \code{\link{vlabels}}). } } \section{Table of Functions}{ \tabular{lllll}{\emph{ Function / S3 Generic } \tab\tab \emph{ Methods } \tab\tab \emph{ Description } \cr \code{\link[=fselect]{fselect(<-)}} \tab\tab No methods, for data frames \tab\tab Fast select or replace columns (non-standard evaluation) \cr \code{\link[=get_vars]{get_vars(<-)}}, \code{\link[=num_vars]{num_vars(<-)}}, \code{\link[=cat_vars]{cat_vars(<-)}}, \code{\link[=char_vars]{char_vars(<-)}}, \code{\link[=fact_vars]{fact_vars(<-)}}, \code{\link[=logi_vars]{logi_vars(<-)}}, \code{\link[=date_vars]{date_vars(<-)}} \tab\tab No methods, for data frames \tab\tab Fast select or replace columns \cr \code{\link[=add_vars]{add_vars(<-)}} \tab\tab No methods, for data frames \tab\tab Fast add columns \cr \code{\link{fsubset}} \tab\tab \code{default, matrix, data.frame} \tab\tab Fast subset data (non-standard evaluation) \cr \code{\link{ss}} \tab\tab No methods, for data frames \tab\tab Fast subset data frames \cr \code{\link{fsummarise}} \tab\tab No methods, for data frames \tab\tab Fast data aggregation \cr \code{\link{fmutate}}, \code{\link[=ftransform]{(f/set)ftransform(<-)}} \tab\tab No methods, for data frames \tab\tab Compute, modify or delete columns (non-standard evaluation) \cr %\code{\link{settransform}} \tab\tab No methods, for data frames \tab\tab Compute, modify or delete columns by reference (non-standard evaluation) \cr \code{\link[=fcompute]{fcompute(v)}} \tab\tab No methods, for data frames \tab\tab Compute or modify columns, returned in a new data frame (non-standard evaluation) \cr \code{\link[=roworder]{roworder(v)}} \tab\tab No methods, for data frames \tab\tab Reorder rows and return data frame (standard and non-standard evaluation) \cr \code{\link[=colorder]{colorder(v)}} \tab\tab No methods, for data frames \tab\tab Reorder columns and return data frame (standard and non-standard evaluation) \cr \code{\link[=frename]{(f/set)rename}}, \code{\link[=frename]{(set)relabel}} \tab\tab No methods, for all objects with 'names' attribute \tab\tab Rename and return object / relabel columns in a data frame. \cr } } \seealso{ \link[=collapse-documentation]{Collapse Overview}, \link[=quick-conversion]{Quick Data Conversion}, \link[=recode-replace]{Recode and Replace Values} } \keyword{manip} \keyword{documentation} collapse/man/psmat.Rd0000644000176200001440000001471614167157030014257 0ustar liggesusers\name{psmat} \alias{psmat} \alias{psmat.default} \alias{psmat.pseries} \alias{psmat.data.frame} \alias{psmat.pdata.frame} \alias{plot.psmat} \alias{aperm.psmat} \alias{[.psmat} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Matrix / Array from Panel Series } \description{ \code{psmat} efficiently expands a panel-vector or \code{plm::pseries} into a matrix. If a data frame or \code{plm::pdata.frame} is passed, \code{psmat} returns (default) a 3D array or a list of matrices. % By default the matrix is created such that group-identifiers constitute the rows and time the columns. } \usage{ psmat(x, \dots) \method{psmat}{default}(x, g, t = NULL, transpose = FALSE, \dots) \method{psmat}{pseries}(x, transpose = FALSE, \dots) \method{psmat}{data.frame}(x, by, t = NULL, cols = NULL, transpose = FALSE, array = TRUE, \dots) \method{psmat}{pdata.frame}(x, cols = NULL, transpose = FALSE, array = TRUE, \dots) \method{plot}{psmat}(x, legend = FALSE, colours = legend, labs = NULL, grid = FALSE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a vector, panel series (\code{plm::pseries}), data frame or panel data frame (\code{plm::pdata.frame}).} \item{g}{a factor, \code{GRP} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{GRP} object) used to group \code{x}. If the panel is balanced an integer indicating the number of groups can also be supplied. See Examples.} \item{by}{\emph{data.frame method}: Same input as \code{g}, but also allows one- or two-sided formulas using the variables in \code{x}, i.e. \code{~ idvar} or \code{var1 + var2 ~ idvar1 + idvar2}.} \item{t}{same inputs as \code{g}, to indicate the time-variable(s) or second identifier(s). \code{g} and \code{t} together should fully identify the panel. If \code{t = NULL}, the data is assumed sorted and \code{seq_col} is used to generate rownames for the output matrix.} \item{cols}{\emph{data.frame method}: Select columns using a function, column names, indices or a logical vector. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{transpose}{logical. \code{TRUE} generates the matrix such that \code{g/by -> columns, t -> rows}. Default is \code{g/by -> rows, t -> columns}.} \item{array}{\emph{data.frame / pdata.frame methods}: logical. \code{TRUE} returns a 3D array (if just one column is selected a matrix is returned). \code{FALSE} returns a list of matrices.} \item{\dots}{arguments to be passed to or from other methods, or for the plot method additional arguments passed to \code{ts.plot}.} \item{legend}{logical. Automatically create a legend of panel-groups.} \item{colours}{either \code{TRUE} to automatically colour by panel-groups using \code{\link{rainbow}} or a character vector of colours matching the number of panel-groups (series).} \item{labs}{character. Provide a character-vector of variable labels / series titles when plotting an array.} \item{grid}{logical. Calls \code{\link{grid}} to draw gridlines on the plot.} } \details{ For \code{plm::pseries}, the first \code{index} variable is taken to be the group-id and the second the time variable. If more than 2 index variables are attached to \code{plm::pseries}, the last one is taken as the time variable and the others are taken as group-id's and interacted. } \value{ A matrix or 3D array containing the data in \code{x}, where by default the rows constitute the groups-ids (\code{g/by}) and the columns the time variable or individual ids (\code{t}). 3D arrays contain the variables in the 3rd dimension. The objects have a class 'psmat', and also a 'transpose' attribute indicating whether \code{transpose = TRUE}. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } \note{ The \code{pdata.frame} method only works for properly subsetted objects of class 'pdata.frame'. A list of 'pseries' won't work. There also exist simple \code{aperm} and \code{[} (subset) methods for 'psmat' objects. These differ from the default methods only by keeping the class and the 'transpose' attribute. } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \link[=time-series-panel-series]{Time Series and Panel Series}, \link[=collapse-documentation]{Collapse Overview} %% ~~objects to See Also as \code{\link{help}}, ~~~ } \examples{ ## World Development Panel Data head(wlddev) # View data qsu(wlddev, pid = ~ iso3c, cols = 9:12, vlabels = TRUE) # Sumarizing data str(psmat(wlddev$PCGDP, wlddev$iso3c, wlddev$year)) # Generating matrix of GDP r <- psmat(wlddev, PCGDP ~ iso3c, ~ year) # Same thing using data.frame method plot(r, main = vlabels(wlddev)[9], xlab = "Year") # Plot the matrix str(r) # See srructure str(psmat(wlddev$PCGDP, wlddev$iso3c)) # The Data is sorted, could omit t str(psmat(wlddev$PCGDP, 216)) # This panel is also balanced, so # ..indicating the number of groups would be sufficient to obtain a matrix ar <- psmat(wlddev, ~ iso3c, ~ year, 9:12) # Get array of transposed matrices str(ar) plot(ar) plot(ar, legend = TRUE) plot(psmat(collap(wlddev, ~region+year, cols = 9:12), # More legible and fancy plot ~region, ~year), legend = TRUE, labs = vlabels(wlddev)[9:12]) psml <- psmat(wlddev, ~ iso3c, ~ year, 9:12, array = FALSE) # This gives list of ps-matrices head(unlist2d(psml, "Variable", "Country", id.factor = TRUE),2) # Using unlist2d, can generate DF \donttest{ % No code relying on suggested package ## Using plm simplifies things pwlddev <- plm::pdata.frame(wlddev, index = c("iso3c","year")) # Creating a Panel Data Frame PCGDP <- pwlddev$PCGDP # A panel-Series of GDP per Capita head(psmat(PCGDP), 2) # Same as above, more parsimonious plot(psmat(PCGDP)) plot(psmat(pwlddev[9:12])) plot(psmat(G(pwlddev[9:12]))) # Here plotting panel- growth rates } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{array} \keyword{ts} collapse/man/pad.Rd0000644000176200001440000000765414167161352013704 0ustar liggesusers\name{pad} \alias{pad} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Pad Matrix-Like Objects with a Value } \description{ The \code{pad} function inserts elements / rows filled with \code{value} into a vector matrix or data frame \code{X} at positions given by \code{i}. It is particularly useful to expand objects returned by statistical procedures which remove missing values to the original data dimensions. } \usage{ pad(X, i, value = NA, method = c("auto", "xpos", "vpos")) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{ a vector, matrix, data frame or list of equal-length columns. } \item{i}{ either an integer (positive or negative) or logical vector giving positions / rows of \code{X} into which \code{value}'s should be inserted, or, alternatively, a positive integer vector with \code{length(i) == NROW(X)}, but with some gaps in the indices into which \code{value}'s can be inserted, or a logical vector with \code{sum(i) == NROW(X)} such that \code{value}'s can be inserted for \code{FALSE} values in the logical vector. See also \code{method} and Examples. } \item{value}{ a scalar value to be replicated and inserted into \code{X} at positions / rows given by \code{i}. Default is \code{NA}. } \item{method}{ an integer or string specifying what the use of \code{i}. The options are: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "auto" \tab\tab automatic method selection: If \code{i} is positive integer and \code{length(i) == NROW(X)} or if \code{i} is logical and \code{sum(i) == NROW(X)}, choose method "xpos", else choose "vpos". \cr\cr\cr 1 \tab\tab "xpos" \tab\tab \code{i} is a vector of positive integers or a logical vector giving the positions of the the elements / rows of \code{X}. \code{values}'s are inserted where there are gaps / \code{FALSE} values in \code{i}. \cr\cr\cr 2 \tab\tab "vpos" \tab\tab \code{i} is a vector of positive / negative integers or a logical vector giving the positions at which \code{values}'s / rows should be inserted into \code{X}. } } } \value{ \code{X} with elements / rows filled with \code{value} inserted at positions given by \code{i}. } \seealso{ \code{\link{append}}, \link[=recode-replace]{Recode and Replace Values}, \link[=small-helpers]{Small (Helper) Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ v <- 1:3 pad(v, 1:2) # Automatic selection of method "vpos" pad(v, -(1:2)) # Same thing pad(v, c(TRUE, TRUE, FALSE, FALSE, FALSE)) # Same thing pad(v, c(1, 3:4)) # Automatic selection of method "xpos" pad(v, c(TRUE, FALSE, TRUE, TRUE, FALSE)) # Same thing head(pad(wlddev, 1:3)) # Insert 3 missing rows at the beginning of the data head(pad(wlddev, 2:4)) # ... at rows positions 2-4 # pad() is mostly useful for statistical models which only use the complete cases: mod <- lm(LIFEEX ~ PCGDP, wlddev) # Generating a residual column in the original data (automatic selection of method "vpos") settfm(wlddev, resid = pad(resid(mod), mod$na.action)) # Another way to do it: r <- resid(mod) i <- as.integer(names(r)) resid2 <- pad(r, i) # automatic selection of method "xpos" # here we need to add some elements as flast(i) < nrow(wlddev) resid2 <- c(resid2, rep(NA, nrow(wlddev)-length(resid2))) # See that these are identical: identical(unattrib(wlddev$resid), resid2) # Can also easily get a model matrix at the dimensions of the original data mm <- pad(model.matrix(mod), mod$na.action) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{manip} % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/dapply.Rd0000644000176200001440000001352214167156464014430 0ustar liggesusers\name{dapply} \alias{dapply} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Data Apply } \description{ \code{dapply} efficiently applies functions to columns or rows of matrix-like objects and by default returns an object of the same type and with the same attributes. Alternatively it is possible to return the result in a plain matrix or data.frame. A simple parallelism is also available. } \usage{ dapply(X, FUN, \dots, MARGIN = 2, parallel = FALSE, mc.cores = 1L, return = c("same", "matrix", "data.frame"), drop = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{a matrix, data frame or alike object.} \item{FUN}{a function, can be scalar- or vector-valued.} \item{\dots}{further arguments to \code{FUN}.} \item{MARGIN}{integer. The margin which \code{FUN} will be applied over. Default \code{2} indicates columns while \code{1} indicates rows. See also Details. } \item{parallel}{logical. \code{TRUE} implements simple parallel execution by internally calling \code{\link{mclapply}} instead of \code{\link{lapply}}.} \item{mc.cores}{integer. Argument to \code{\link{mclapply}} indicating the number of cores to use for parallel execution. Can use \code{\link[=detectCores]{detectCores()}} to select all available cores.} \item{return}{an integer or string indicating the type of object to return. The default \code{1 - "same"} returns the same object type (i.e. class and other attributes are retained, just the names for the dimensions are adjusted). \code{2 - "matrix"} always returns the output as matrix and \code{3 - "data.frame"} always returns a data frame.} \item{drop}{logical. If the result has only one row or one column, \code{drop = TRUE} will drop dimensions and return a (named) atomic vector.} } \details{ \code{dapply} is an efficient command to apply functions to rows or columns of data without loosing information (attributes) about the data or changing the classes or format of the data. It is principally an efficient wrapper around \code{\link{lapply}} and works as follows: \itemize{ \item Save the attributes of \code{X}. \item If \code{MARGIN = 2} (columns), convert matrices to plain lists of columns using \code{\link{mctl}} and remove all attributes from data frames. \item If \code{MARGIN = 1} (rows), convert matrices to plain lists of rows using \code{\link{mrtl}}. For data frames remove all attributes, efficiently convert to matrix using \code{do.call(rbind, X)} and also convert to list of rows using \code{\link{mrtl}}. \item Call \code{\link{lapply}} or \code{\link{mclapply}} on these plain lists (which is faster than calling \code{lapply} on an object with attributes). \item depending on the requested output type, use \code{\link{matrix}}, \code{\link{unlist}} or \code{\link[=do.call]{do.call(cbind, ...)}} to convert the result back to a matrix or list of columns. \item modify the relevant attributes accordingly and efficiently attach to the object again (no further checks). % , non essential attributes are kept and added at the end of the attribute list } The performance gain from working with plain lists makes \code{dapply} not much slower than calling \code{lapply} itself on a data frame. Because of the conversions involved, row-operations require some memory, but are still faster than \code{\link{apply}}. } \value{ \code{X} where \code{FUN} was applied to every row or column. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } % \note{ %% ~~further notes~~ % } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{BY}}, \code{\link{collap}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ head(dapply(mtcars, log)) # Take natural log of each variable head(dapply(mtcars, log, return = "matrix")) # Return as matrix m <- as.matrix(mtcars) head(dapply(m, log)) # Same thing head(dapply(m, log, return = "data.frame")) # Return data frame from matrix dapply(mtcars, sum); dapply(m, sum) # Computing sum of each column, return as vector dapply(mtcars, sum, drop = FALSE) # This returns a data frame of 1 row dapply(mtcars, sum, MARGIN = 1) # Compute row-sum of each column, return as vector dapply(m, sum, MARGIN = 1) # Same thing for matrices, faster t. apply(m, 1, sum) head(dapply(m, sum, MARGIN = 1, drop = FALSE)) # Gives matrix with one column head(dapply(m, quantile, MARGIN = 1)) # Compute row-quantiles dapply(m, quantile) # Column-quantiles head(dapply(mtcars, quantile, MARGIN = 1)) # Same for data frames, output is also a data.frame dapply(mtcars, quantile) # With classed objects, we have to be a bit careful \dontrun{ dapply(EuStockMarkets, quantile) # This gives an error because the tsp attribute is misspecified } dapply(EuStockMarkets, quantile, return = "matrix") # These both work fine.. dapply(EuStockMarkets, quantile, return = "data.frame") \donttest{ % No code relying on suggested package # Similarly for grouped tibbles and other data frame based classes library(dplyr) gmtcars <- group_by(mtcars,cyl,vs,am) head(dapply(gmtcars, log)) # Still gives a grouped tibble back dapply(gmtcars, quantile, MARGIN = 1) # Here it makes sense to keep the groups attribute dapply(gmtcars, quantile) # This does not make much sense, ... dapply(gmtcars, quantile, # better convert to plain data.frame: return = "data.frame") } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line collapse/man/fhdbetween_fhdwithin.Rd0000644000176200001440000003356714176642305017323 0ustar liggesusers\name{fhdbetween-fhdwithin} \alias{fhdbetween} \alias{fhdbetween.default} \alias{fhdbetween.matrix} \alias{fhdbetween.data.frame} \alias{fhdbetween.pseries} \alias{fhdbetween.pdata.frame} \alias{fhdwithin} \alias{fhdwithin.default} \alias{fhdwithin.matrix} \alias{fhdwithin.data.frame} \alias{fhdwithin.pseries} \alias{fhdwithin.pdata.frame} \alias{HDW} \alias{HDW.default} \alias{HDW.matrix} \alias{HDW.data.frame} \alias{HDW.pseries} \alias{HDW.pdata.frame} \alias{HDB} \alias{HDB.default} \alias{HDB.matrix} \alias{HDB.data.frame} \alias{HDB.pseries} \alias{HDB.pdata.frame} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Higher-Dimensional Centering and Linear Prediction } \description{ \code{fhdbetween} is a generalization of \code{fbetween} to efficiently predict with multiple factors and linear models (i.e. predict with vectors/factors, matrices, or data frames/lists where the latter may contain multiple factor variables). Similarly, \code{fhdwithin} is a generalization of \code{fwithin} to center on multiple factors and partial-out linear models. The corresponding operators \code{HDB} and \code{HDW} additionally allow to predict / partial out full \code{lm()} formulas with interactions between variables. } \usage{ fhdbetween(x, \dots) fhdwithin(x, \dots) HDB(x, \dots) HDW(x, \dots) \method{fhdbetween}{default}(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, lm.method = "qr", \dots) \method{fhdwithin}{default}(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, lm.method = "qr", \dots) \method{HDB}{default}(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, lm.method = "qr", \dots) \method{HDW}{default}(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, lm.method = "qr", \dots) \method{fhdbetween}{matrix}(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, lm.method = "qr", \dots) \method{fhdwithin}{matrix}(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, lm.method = "qr", \dots) \method{HDB}{matrix}(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, stub = "HDB.", lm.method = "qr", \dots) \method{HDW}{matrix}(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, stub = "HDW.", lm.method = "qr", \dots) \method{fhdbetween}{data.frame}(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, variable.wise = FALSE, lm.method = "qr", \dots) \method{fhdwithin}{data.frame}(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, variable.wise = FALSE, lm.method = "qr", \dots) \method{HDB}{data.frame}(x, fl, w = NULL, cols = is.numeric, na.rm = TRUE, fill = FALSE, variable.wise = FALSE, stub = "HDB.", lm.method = "qr", \dots) \method{HDW}{data.frame}(x, fl, w = NULL, cols = is.numeric, na.rm = TRUE, fill = FALSE, variable.wise = FALSE, stub = "HDW.", lm.method = "qr", \dots) # Methods for compatibility with plm: \method{fhdbetween}{pseries}(x, effect = "all", w = NULL, na.rm = TRUE, fill = TRUE, \dots) \method{fhdwithin}{pseries}(x, effect = "all", w = NULL, na.rm = TRUE, fill = TRUE, \dots) \method{HDB}{pseries}(x, effect = "all", w = NULL, na.rm = TRUE, fill = TRUE, \dots) \method{HDW}{pseries}(x, effect = "all", w = NULL, na.rm = TRUE, fill = TRUE, \dots) \method{fhdbetween}{pdata.frame}(x, effect = "all", w = NULL, na.rm = TRUE, fill = TRUE, variable.wise = TRUE, \dots) \method{fhdwithin}{pdata.frame}(x, effect = "all", w = NULL, na.rm = TRUE, fill = TRUE, variable.wise = TRUE, \dots) \method{HDB}{pdata.frame}(x, effect = "all", w = NULL, cols = is.numeric, na.rm = TRUE, fill = TRUE, variable.wise = TRUE, stub = "HDB.", \dots) \method{HDW}{pdata.frame}(x, effect = "all", w = NULL, cols = is.numeric, na.rm = TRUE, fill = TRUE, variable.wise = TRUE, stub = "HDW.", \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame, panel series (\code{plm::pseries}) or panel data frame (\code{plm::pdata.frame}).} \item{fl}{a numeric vector, factor, matrix, data frame or list (which may or may not contain factors). In the data frame method \code{fl} can also be a one-or two sided \code{lm()} formula with variables contained in \code{x}. Interactions \code{(:)} and full interactions \code{(*)} are supported. See Examples and the Note.} \item{w}{a vector of (non-negative) weights.} \item{cols}{\emph{data.frame methods}: Select columns to center (partial-out) or predict using column names, indices, a logical vector or a function. Unless specified otherwise all numeric columns are selected. If \code{NULL}, all variables are selected.} \item{na.rm}{remove missing values from both \code{x} and \code{fl}. by default rows with missing values in \code{x} or \code{fl} are removed. In that case an attribute "na.rm" is attached containing the rows removed.} \item{fill}{If \code{na.rm = TRUE}, \code{fill = TRUE} will not remove rows with missing values in \code{x} or \code{fl}, but fill them with \code{NA}'s.} \item{variable.wise}{\emph{(p)data.frame methods}: Setting \code{variable.wise = TRUE} will process each column individually i.e. use all non-missing cases in each column and in \code{fl} (\code{fl} is only checked for missing values if \code{na.rm = TRUE}). This is a lot less efficient but uses all data available in each column. } \item{effect}{\emph{plm} methods: Select which panel identifiers should be used for centering. 1L takes the first variable in the \code{plm::index}, 2L the second etc.. Index variables can also be called by name using a character vector. The keyword \code{"all"} uses all identifiers. } \item{stub}{a prefix or stub to rename all transformed columns. \code{FALSE} will not rename columns.} \item{lm.method}{character. The linear fitting method. Supported are \code{"chol"} and \code{"qr"}. See \code{\link{flm}}.} \item{\dots}{further arguments passed to \code{fixest::demean} (other than \code{notes} and \code{im_confident}) and \code{\link{chol}} / \code{\link{qr}}. Possible choices are \code{tol} to set a uniform numerical tolerance for the entire fitting process, or \code{nthreads} and \code{iter} to govern the higher-order centering process.} } \details{ \code{fhdbetween/HDB} and \code{fhdwithin/HDW} are powerful functions for high-dimensional linear prediction problems involving large factors and datasets, but can just as well handle ordinary regression problems. They are implemented as efficient wrappers around \code{\link[=fwithin]{fbetween / fwithin}}, \code{\link{flm}} and some C++ code from the \code{fixest} package that is imported for higher-order centering tasks (thus \code{fixest} needs to be installed for problems involving more than one factor). Intended areas of use are to efficiently obtain residuals and predicted values from data, and to prepare data for complex linear models involving multiple levels of fixed effects. Such models can now be fitted using \code{lm()} on data prepared with \code{fhdwithin / HDW} (relying on bootstrapped SE's for inference, or implementing the appropriate corrections). See Examples. If \code{fl} is a vector or matrix, the result are identical to \code{lm} i.e. \code{fhdbetween / HDB} returns \code{fitted(lm(x ~ fl))} and \code{fhdwithin / HDW} \code{residuals(lm(x ~ fl))}. If \code{fl} is a list containing factors, all variables in \code{x} and non-factor variables in \code{fl} are centered on these factors using either \code{\link[=fwithin]{fbetween / fwithin}} for a single factor or \code{fixest} C++ code for multiple factors. Afterwards the centered data is regressed on the centered predictors. If \code{fl} is just a list of factors, \code{fhdwithin/HDW} returns the centered data and \code{fhdbetween/HDB} the corresponding means. Take as a most general example a list \code{fl = list(fct1, fct2, ..., var1, var2, ...)} where \code{fcti} are factors and \code{vari} are continuous variables. The output of \code{fhdwithin/HDW | fhdbetween/HDB} will then be identical to calling \code{resid | fitted} on \code{lm(x ~ fct1 + fct2 + ... + var1 + var2 + ...)}. The computations performed by \code{fhdwithin/HDW} and \code{fhdbetween/HDB} are however much faster and more memory efficient than \code{lm} because factors are not passed to \code{\link{model.matrix}} and expanded to matrices of dummies but projected beforehand. The formula interface to the data.frame method (only supported by the operators \code{HDW | HDB}) provides ease of use and allows for additional modeling complexity. For example it is possible to project out formulas like \code{HDW(data, ~ fct1*var1 + fct2:fct3 + var2:fct2:fct3 + var2:var3 + poly(var5,3)*fct5)} containing simple \code{(:)} or full \code{(*)} interactions of factors with continuous variables or polynomials of continuous variables, and two-or three-way interactions of factors and continuous variables. If the formula is one-sided as in the example above (the space left of \code{(~)} is left empty), the formula is applied to all variables selected through \code{cols}. The specification provided in \code{cols} (default: all numeric variables not used in the formula) can be overridden by supplying one-or more dependent variables. For example \code{HDW(data, var1 + var2 ~ fct1 + fct2)} will return a data.frame with \code{var1} and \code{var2} centered on \code{fct1} and \code{fct2}. The special methods for \code{plm::pseries} and \code{plm::pdata.frame} center a panel series or variables in a panel data frame on all panel-identifiers. By default in these methods \code{fill = TRUE} and \code{variable.wise = TRUE}, so missing values are kept. This change in the default arguments was done to ensure a coherent framework of functions and operators applied to \emph{plm} panel data classes. } \note{ % \subsection{Caution with full (*) and factor-continuous variable interactions:}{ % In general full interactions specified with \code{(*)} can be very slow on large data, and \code{lfe::demeanlist} is also not very speedy on interaction between factors and continuous variables, so these structures should be used with caution (don't just specify an interaction like that on a large dataset, start with smaller data and see how long computations take. Upon further updates of \code{lfe::demeanlist}, performance might improve). % } \subsection{On the differences between \code{fhdwithin/HDW}\dots and \code{fwithin/W}\dots:}{ \itemize{ \item \code{fhdwithin/HDW} can center data on multiple factors and also partial out continuous variables and factor-continuous interactions while \code{fwithin/W} only centers on one factor or the interaction of a set of factors, and does that very efficiently. \item \code{HDW(data, ~ qF(group1) + qF(group2))} simultaneously centers numeric variables in data on \code{group1} and \code{group2}, while \code{W(data, ~ group1 + group2)} centers data on the interaction of \code{group1} and \code{group2}. The equivalent operation in \code{HDW} would be: \code{HDW(data, ~ qF(group1):qF(group2))}. \item \code{W} always does computations on the variable-wise complete observations (in both matrices and data frames), whereas by default \code{HDW} removes all cases missing in either \code{x} or \code{fl}. In short, \code{W(data, ~ group1 + group2)} is actually equivalent to \code{HDW(data, ~ qF(group1):qF(group2), variable.wise = TRUE)}. \code{HDW(data, ~ qF(group1):qF(group2))} would remove any missing cases. \item \code{fbetween/B} and \code{fwithin/W} have options to fill missing cases using group-averages and to add the overall mean back to group-demeaned data. These options are not available in \code{fhdbetween/HDB} and \code{fhdwithin/HDW}. Since \code{HDB} and \code{HDW} by default remove missing cases, they also don't have options to keep grouping-columns as in \code{B} and \code{W}. } } } \value{ \code{HDB} returns fitted values of regressing \code{x} on \code{fl}. \code{HDW} returns residuals. See Details and Examples. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } % \note{ %% ~~further notes~~ % } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link[=fbetween]{fbetween, fwithin}}, \code{\link{fscale}}, \code{\link{TRA}}, \code{\link{flm}}, \code{\link{fFtest}}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ HDW(mtcars$mpg, mtcars$carb) # Simple regression problems HDW(mtcars$mpg, mtcars[-1]) HDW(mtcars$mpg, qM(mtcars[-1])) head(HDW(qM(mtcars[3:4]), mtcars[1:2])) head(HDW(iris[1:2], iris[3:4])) # Partialling columns 3 and 4 out of colums 1 and 2 head(HDW(iris[1:2], iris[3:5])) # Adding the Species factor -> fixed effect \donttest{ % We don't test because this code depends on suggested package. head(HDW(wlddev, PCGDP + LIFEEX ~ iso3c + qF(year))) # Partialling out 2 fixed effects head(HDW(wlddev, PCGDP + LIFEEX ~ iso3c + qF(year), variable.wise = TRUE)) # Variable-wise head(HDW(wlddev, PCGDP + LIFEEX ~ iso3c + qF(year) + ODA)) # Adding ODA as a continuous regressor head(HDW(wlddev, PCGDP + LIFEEX ~ iso3c:qF(decade) + qF(year) + ODA)) # Country-decade and year FE's head(HDW(wlddev, PCGDP + LIFEEX ~ iso3c*year)) # Country specific time trends head(HDW(wlddev, PCGDP + LIFEEX ~ iso3c*poly(year, 3))) # Country specific cubic trends # More complex examples lm(HDW.mpg ~ HDW.hp, data = HDW(mtcars, ~ factor(cyl)*carb + vs + wt:gear + wt:gear:carb)) lm(mpg ~ hp + factor(cyl)*carb + vs + wt:gear + wt:gear:carb, data = mtcars) lm(HDW.mpg ~ HDW.hp, data = HDW(mtcars, ~ factor(cyl)*carb + vs + wt:gear)) lm(mpg ~ hp + factor(cyl)*carb + vs + wt:gear, data = mtcars) lm(HDW.mpg ~ HDW.hp, data = HDW(mtcars, ~ cyl*carb + vs + wt:gear)) lm(mpg ~ hp + cyl*carb + vs + wt:gear, data = mtcars) lm(HDW.mpg ~ HDW.hp, data = HDW(mtcars, mpg + hp ~ cyl*carb + factor(cyl)*poly(drat,2))) lm(mpg ~ hp + cyl*carb + factor(cyl)*poly(drat,2), data = mtcars) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line \keyword{multivariate} collapse/man/fdiff.Rd0000644000176200001440000003540614175334642014215 0ustar liggesusers\name{fdiff} \alias{fdiff} \alias{fdiff.default} \alias{fdiff.matrix} \alias{fdiff.data.frame} \alias{fdiff.list} \alias{fdiff.pseries} \alias{fdiff.pdata.frame} \alias{fdiff.grouped_df} \alias{D} \alias{D.default} \alias{D.matrix} \alias{D.data.frame} \alias{D.list} \alias{D.pseries} \alias{D.pdata.frame} \alias{D.grouped_df} \alias{Dlog} \alias{Dlog.default} \alias{Dlog.matrix} \alias{Dlog.data.frame} \alias{Dlog.list} \alias{Dlog.pseries} \alias{Dlog.pdata.frame} \alias{Dlog.grouped_df} %- Also NEED an '\alias' for EACH other topic documented here. \title{ % Lagged and Iterated Fast (Quasi-, Log-) Differences for Time Series and Panel Data } \description{ \code{fdiff} is a S3 generic to compute (sequences of) suitably lagged / leaded and iterated differences, quasi-differences, log-differences or quasi-log-differences. The difference and log-difference operators \code{D} and \code{Dlog} also exists as parsimonious wrappers around \code{fdiff}, providing more flexibility than \code{fdiff} when applied to data frames. } \usage{ fdiff(x, n = 1, diff = 1, \dots) D(x, n = 1, diff = 1, \dots) Dlog(x, n = 1, diff = 1, \dots) \method{fdiff}{default}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = TRUE, \dots) \method{D}{default}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, rho = 1, stubs = TRUE, \dots) \method{Dlog}{default}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, rho = 1, stubs = TRUE, \dots) \method{fdiff}{matrix}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, \dots) \method{D}{matrix}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, rho = 1, stubs = TRUE, \dots) \method{Dlog}{matrix}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, rho = 1, stubs = TRUE, \dots) \method{fdiff}{data.frame}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, \dots) \method{D}{data.frame}(x, n = 1, diff = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, rho = 1, stubs = TRUE, keep.ids = TRUE, \dots) \method{Dlog}{data.frame}(x, n = 1, diff = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, rho = 1, stubs = TRUE, keep.ids = TRUE, \dots) # Methods for compatibility with plm: \method{fdiff}{pseries}(x, n = 1, diff = 1, fill = NA, log = FALSE, rho = 1, stubs = TRUE, \dots) \method{D}{pseries}(x, n = 1, diff = 1, fill = NA, rho = 1, stubs = TRUE, \dots) \method{Dlog}{pseries}(x, n = 1, diff = 1, fill = NA, rho = 1, stubs = TRUE, \dots) \method{fdiff}{pdata.frame}(x, n = 1, diff = 1, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, \dots) \method{D}{pdata.frame}(x, n = 1, diff = 1, cols = is.numeric, fill = NA, rho = 1, stubs = TRUE, keep.ids = TRUE, \dots) \method{Dlog}{pdata.frame}(x, n = 1, diff = 1, cols = is.numeric, fill = NA, rho = 1, stubs = TRUE, keep.ids = TRUE, \dots) # Methods for grouped data frame / compatibility with dplyr: \method{fdiff}{grouped_df}(x, n = 1, diff = 1, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, keep.ids = TRUE, \dots) \method{D}{grouped_df}(x, n = 1, diff = 1, t = NULL, fill = NA, rho = 1, stubs = TRUE, keep.ids = TRUE, \dots) \method{Dlog}{grouped_df}(x, n = 1, diff = 1, t = NULL, fill = NA, rho = 1, stubs = TRUE, keep.ids = TRUE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a numeric vector / time series, (time series) matrix, data frame, panel series (\code{plm::pseries}), panel data frame (\code{plm::pdata.frame}) or grouped data frame (class 'grouped_df').} \item{n}{integer. A vector indicating the number of lags or leads.} \item{diff}{integer. A vector of integers > 1 indicating the order of differencing / log-differencing.} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{by}{\emph{data.frame method}: Same as \code{g}, but also allows one- or two-sided formulas i.e. \code{~ group1} or \code{var1 + var2 ~ group1 + group2}. See Examples.} \item{t}{same input as \code{g/by}, to indicate the time-variable(s). For safe computation of differences on unordered time series and panels. Data Frame method also allows one-sided formula i.e. \code{~time}. grouped_df method supports lazy-evaluation i.e. \code{time} (no quotes).} \item{cols}{\emph{data.frame method}: Select columns to difference using a function, column names, indices or a logical vector. Default: All numeric variables. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{fill}{value to insert when vectors are shifted. Default is \code{NA}. } \item{log}{logical. \code{TRUE} computes log-differences instead. See Details.} \item{rho}{double. Autocorrelation parameter. Set to a value between 0 and 1 for quasi-differencing. Any numeric value can be supplied. } \item{stubs}{logical. \code{TRUE} will rename all differenced columns by adding prefixes "L\code{n}D\code{diff}." / "F\code{n}D\code{diff}." for differences "L\code{n}Dlog\code{diff}." / "F\code{n}Dlog\code{diff}." for log-differences and replacing "D" / "Dlog" with "QD" / "QDlog" for quasi-differences. } \item{keep.ids}{\emph{data.frame / pdata.frame / grouped_df methods}: Logical. Drop all panel-identifiers from the output (which includes all variables passed to \code{by} or \code{t}). \emph{Note}: For grouped / panel data frames identifiers are dropped, but the 'groups' / 'index' attributes are kept.} \item{\dots}{arguments to be passed to or from other methods.} } \details{ By default, \code{fdiff/D/Dlog} return \code{x} with all columns differenced / log-differenced. Differences are computed as \code{repeat(diff) x[i] - rho*x[i-n]}, and log-differences as \code{repeat(diff) log(x[i]) - rho*log(x[i-n])}. If \code{rho < 1}, this becomes quasi- (or partial) differencing, which is a technique suggested by Cochrane and Orcutt (1949) to deal with serial correlation in regression models, where \code{rho} is typically estimated by running a regression of the model residuals on the lagged residuals. Setting \code{diff = 2} returns differences of differences etc\dots and setting \code{n = 2} returns simple differences computed by subtracting twice-lagged \code{x} from \code{x}. It is also possible to compute forward differences by passing negative \code{n} values. \code{n} also supports arbitrary vectors of integers (lags), and \code{diff} supports positive sequences of integers (differences): If more than one value is passed to \code{n} and/or \code{diff}, the data is expanded-wide as follows: If \code{x} is an atomic vector or time series, a (time series) matrix is returned with columns ordered first by lag, then by difference. If \code{x} is a matrix or data frame, each column is expanded in like manor such that the output has \code{ncol(x)*length(n)*length(diff)} columns ordered first by column name, then by lag, then by difference. With groups/panel-identifiers supplied to \code{g/by}, \code{fdiff/D/Dlog} efficiently compute panel-differences. If \code{t} is left empty, the data needs to be ordered such that all values belonging to a group are consecutive and in the right order. It is not necessary that the groups themselves occur in the right order. If time-variable(s) are supplied to \code{t}, the panel is fully identified and differences can be securely computed even if the data is unordered. \code{fdiff/D/Dlog} supports balanced panels and unbalanced panels where various individuals are observed for different time-sequences. % (both start, end and duration of observation can differ for each individual), but does not natively support irregularly spaced time series and panels. For computational details and efficiency considerations see the help page for \code{\link{flag}}. %A work-around for differencing irregular panels is easily achieved with the help of \code{\link{seqid}}. It is also possible to compute differences on unordered vectors or irregular time series (thus utilizing \code{t} but leaving \code{g/by} empty). The methods applying to \emph{plm} objects (panel series and panel data frames) automatically utilize the panel-identifiers attached to these objects and thus securely compute fully identified panel-differences. If these objects have > 2 panel-identifiers attached to them, the last identifier is assumed to be the time-variable, and the others are taken as grouping-variables and interacted. } \value{ \code{x} differenced \code{diff} times using lags \code{n} of itself. Quasi and log-differences are toggled by the \code{rho} and \code{log} arguments or the \code{Dlog} operator. Computations can be grouped by \code{g/by} and/or ordered by \code{t}. See Details and Examples. } \references{ Cochrane, D.; Orcutt, G. H. (1949). Application of Least Squares Regression to Relationships Containing Auto-Correlated Error Terms. \emph{Journal of the American Statistical Association}. 44 (245): 32-61. Prais, S. J. & Winsten, C. B. (1954). Trend Estimators and Serial Correlation. \emph{Cowles Commission Discussion Paper No. 383.} Chicago. } % \author{ %% ~~who you are~~ % } % \note{ %% ~~further notes~~ % } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link[=flag]{flag/L/F}}, \code{\link[=fgrowth]{fgrowth/G}}, \link[=time-series-panel-series]{Time Series and Panel Series}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Simple Time Series: AirPassengers D(AirPassengers) # 1st difference, same as fdiff(AirPassengers) D(AirPassengers, -1) # Forward difference Dlog(AirPassengers) # Log-difference D(AirPassengers, 1, 2) # Second difference Dlog(AirPassengers, 1, 2) # Second log-difference D(AirPassengers, 12) # Seasonal difference (data is monthly) D(AirPassengers, # Quasi-difference, see a better example below rho = pwcor(AirPassengers, L(AirPassengers))) head(D(AirPassengers, -2:2, 1:3)) # Sequence of leaded/lagged and iterated differences # let's do some visual analysis plot(AirPassengers) # Plot the series - seasonal pattern is evident plot(stl(AirPassengers, "periodic")) # Seasonal decomposition plot(D(AirPassengers,c(1,12),1:2)) # Plotting ordinary and seasonal first and second differences plot(stl(window(D(AirPassengers,12), # Taking seasonal differences removes most seasonal variation 1950), "periodic")) ## Time Series Matrix of 4 EU Stock Market Indicators, recorded 260 days per year plot(D(EuStockMarkets, c(0, 260))) # Plot series and annual differnces mod <- lm(DAX ~., L(EuStockMarkets, c(0, 260))) # Regressing the DAX on its annual lag summary(mod) # and the levels and annual lags others r <- residuals(mod) # Obtain residuals pwcor(r, L(r)) # Residual Autocorrelation fFtest(r, L(r)) # F-test of residual autocorrelation # (better use lmtest::bgtest) modCO <- lm(QD1.DAX ~., D(L(EuStockMarkets, c(0, 260)), # Cochrane-Orcutt (1949) estimation rho = pwcor(r, L(r)))) summary(modCO) rCO <- residuals(modCO) fFtest(rCO, L(rCO)) # No more autocorrelation ## World Development Panel Data head(fdiff(num_vars(wlddev), 1, 1, # Computes differences of numeric variables wlddev$country, wlddev$year)) # fdiff requires external inputs.. head(D(wlddev, 1, 1, ~country, ~year)) # Differences of numeric variables head(D(wlddev, 1, 1, ~country)) # Without t: Works because data is ordered head(D(wlddev, 1, 1, PCGDP + LIFEEX ~ country, ~year)) # Difference of GDP & Life Expectancy head(D(wlddev, 0:1, 1, ~ country, ~year, cols = 9:10)) # Same, also retaining original series head(D(wlddev, 0:1, 1, ~ country, ~year, 9:10, # Dropping id columns keep.ids = FALSE)) # Dynamic Panel Data Models: summary(lm(D(PCGDP,1,1,iso3c,year) ~ # Diff. GDP regressed on it's lagged level L(PCGDP,1,iso3c,year) + # and the difference of Life Expanctancy D(LIFEEX,1,1,iso3c,year), data = wlddev)) g = qF(wlddev$country) # Omitting t and precomputing g allows for summary(lm(D(PCGDP,1,1,g) ~ L(PCGDP,1,g) + # a bit more parsimonious specification D(LIFEEX,1,1,g), wlddev)) summary(lm(D1.PCGDP ~., # Now adding level and lagged level of L(D(wlddev,0:1,1, ~ country, ~year,9:10),0:1, # LIFEEX and lagged differences rates ~ country, ~year, keep.ids = FALSE)[-1])) \donttest{ % No code relying on suggested package ## Using plm can make things easier, but avoid attaching or 'with' calls: pwlddev <- plm::pdata.frame(wlddev, index = c("country","year")) head(D(pwlddev, 0:1, 1, 9:10)) # Again differences of LIFEEX and PCGDP PCGDP <- pwlddev$PCGDP # A panel-Series of GDP per Capita head(D(PCGDP)) # Differencing the panel series summary(lm(D1.PCGDP ~., # Running the dynamic model again -> data = L(D(pwlddev,0:1,1,9:10),0:1, # code becomes a bit simpler keep.ids = FALSE)[-1])) # One could be tempted to also do something like this, but THIS DOES NOT WORK!!: # -> a pseries is only created when subsetting the pdata.frame using $ or [[ summary(lm(D(PCGDP) ~ L(D(PCGDP,0:1)) + L(D(LIFEEX,0:1),0:1), pwlddev)) # To make it work, one needs to create pseries LIFEEX <- pwlddev$LIFEEX summary(lm(D(PCGDP) ~ L(D(PCGDP,0:1)) + L(D(LIFEEX,0:1),0:1))) # THIS WORKS ! ## Using dplyr: library(dplyr) wlddev \%>\% group_by(country) \%>\% select(PCGDP,LIFEEX) \%>\% fdiff(0:1,1:2) # Adding a first and second difference wlddev \%>\% group_by(country) \%>\% select(year,PCGDP,LIFEEX) \%>\% D(0:1,1:2,year) # Also using t (safer) wlddev \%>\% group_by(country) \%>\% # Dropping id's select(year,PCGDP,LIFEEX) \%>\% D(0:1,1:2,year, keep.ids = FALSE) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{ts} collapse/man/roworder.Rd0000644000176200001440000001001114167156135014763 0ustar liggesusers\name{roworder} \alias{roworder} \alias{roworderv} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Reordering of Data Frame Rows } \description{ A fast substitute for \code{dplyr::arrange}. It returns a sorted copy of the data frame, unless the data is already sorted in which case no copy is made. In addition, rows can be manually re-ordered. Use \code{data.table::setorder} to sort a data frame without creating a copy. %\code{roworder} also does not support grouped tibbles or pdata.frame's, i.e. every data frame is treated the same. } \usage{ roworder(X, \dots, na.last = TRUE) roworderv(X, cols = NULL, neworder = NULL, decreasing = FALSE, na.last = TRUE, pos = "front") } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{a data frame or list of equal-length columns. } \item{\dots}{comma-separated columns of \code{X} to sort by e.g. \code{var1, var2}. Negatives i.e. \code{-var1, var2} can be used to sort in decreasing order of \code{var1}.} \item{cols}{select columns to sort by using a function, column names, indices or a logical vector. The default \code{NULL} sorts by all columns in order of occurrence (from left to right). } \item{na.last}{logical. If \code{TRUE}, missing values in the sorting columns are placed last; if \code{FALSE}, they are placed first; if \code{NA} they are removed (argument passed to \code{\link{radixorderv}}).} \item{decreasing}{logical. Should the sort order be increasing or decreasing? Can also be a vector of length equal to the number of arguments in \code{cols} (argument passed to \code{\link{radixorderv}}).} \item{neworder}{an ordering vector, can be \code{< nrow(X)}. if \code{pos = "front"} or \code{pos = "end"}, a logical vector can also be supplied. This argument overwrites \code{cols}.} \item{pos}{integer or character. Different arrangement options if \code{!is.null(neworder) && length(neworder) < nrow(X)}. \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "front" \tab\tab move rows in \code{neworder} to the front (top) of \code{X} (the default). \cr 2 \tab\tab "end" \tab\tab move rows in \code{neworder} to the end (bottom) of \code{X}. \cr 3 \tab\tab "exchange" \tab\tab just exchange the order of rows in \code{neworder}, other rows remain in the same position. \cr 4 \tab\tab "after" \tab\tab place all further selected rows behind the first selected row. \cr } } } \value{ A copy of \code{X} with rows reordered. If \code{X} is already sorted, \code{X} is simply returned. } \note{ If you don't require a copy of the data, use \code{data.table::setorder} (you can also use it in a piped call as it invisibly returns the data). } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{colorder}}, \code{\link{fsubset}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview}. } \examples{ head(roworder(airquality, Month, -Ozone)) head(roworder(airquality, Month, -Ozone, na.last = NA)) # Removes the missing values in Ozone ## Same in standard evaluation head(roworderv(airquality, c("Month", "Ozone"), decreasing = c(FALSE, TRUE))) head(roworderv(airquality, c("Month", "Ozone"), decreasing = c(FALSE, TRUE), na.last = NA)) ## Custom reordering head(roworderv(mtcars, neworder = 3:4)) # Bring rows 3 and 4 to the front head(roworderv(mtcars, neworder = 3:4, pos = "end")) # Bring them to the end head(roworderv(mtcars, neworder = mtcars$vs == 1)) # Bring rows with vs == 1 to the top } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{ manip } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/t_list.Rd0000644000176200001440000000263414170153037014422 0ustar liggesusers\name{t_list} \alias{t_list} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Efficient List Transpose } \description{ \code{t_list} turns a list of lists inside-out. The performance is quite efficient regardless of the size of the list. } \usage{ t_list(l) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{l}{a list of lists. Elements inside the sublists can be heterogeneous, including further lists. } } \value{ \code{l} transposed such that the second layer of the list becomes the top layer and the top layer the second layer. See Examples. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{rsplit}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview} } \examples{ # Homogenous list of lists l <- list(a = list(c = 1, d = 2), b = list(c = 3, d = 4)) str(l) str(t_list(l)) # Heterogenous case l2 <- list(a = list(c = 1, d = letters), b = list(c = 3:10, d = list(4, e = 5))) attr(l2, "bla") <- "abc" # Attributes other than names are preserved str(l2) str(t_list(l2)) rm(l, l2) } \keyword{list} \keyword{manip} \keyword{utilities} % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/GGDC10S.Rd0000644000176200001440000001073313714110437014113 0ustar liggesusers\name{GGDC10S} \alias{GGDC10S} \docType{data} \title{ Groningen Growth and Development Centre 10-Sector Database } \description{ The GGDC 10-Sector Database provides a long-run internationally comparable dataset on sectoral productivity performance in Africa, Asia, and Latin America. Variables covered in the data set are annual series of value added (in local currency), and persons employed for 10 broad sectors. } \usage{data("GGDC10S")} \format{ A data frame with 5027 observations on the following 16 variables. \describe{ \item{\code{Country}}{\emph{char}: Country (43 countries)} \item{\code{Regioncode}}{\emph{char}: ISO3 Region code} \item{\code{Region}}{\emph{char}: Region (6 World Regions)} \item{\code{Variable}}{\emph{char}: Variable (Value Added or Employment)} \item{\code{Year}}{\emph{num}: Year (67 Years, 1947-2013)} \item{\code{AGR}}{\emph{num}: Agriculture} \item{\code{MIN}}{\emph{num}: Mining} \item{\code{MAN}}{\emph{num}: Manufacturing} \item{\code{PU}}{\emph{num}: Utilities} \item{\code{CON}}{\emph{num}: Construction} \item{\code{WRT}}{\emph{num}: Trade, restaurants and hotels} \item{\code{TRA}}{\emph{num}: Transport, storage and communication} \item{\code{FIRE}}{\emph{num}: Finance, insurance, real estate and business services} \item{\code{GOV}}{\emph{num}: Government services} \item{\code{OTH}}{\emph{num}: Community, social and personal services} \item{\code{SUM}}{\emph{num}: Summation of sector GDP} } } % \details{ %% ~~ If necessary, more details than the __description__ above ~~ % } \source{ \url{https://www.rug.nl/ggdc/productivity/10-sector/} } \references{ Timmer, M. P., de Vries, G. J., & de Vries, K. (2015). "Patterns of Structural Change in Developing Countries." . In J. Weiss, & M. Tribe (Eds.), \emph{Routledge Handbook of Industry and Development.} (pp. 65-83). Routledge. } \seealso{ \code{\link{wlddev}}, \link[=collapse-documentation]{Collapse Overview} } \examples{ namlab(GGDC10S, class = TRUE) # aperm(qsu(GGDC10S, ~ Variable, ~ Variable + Country, vlabels = TRUE)) \donttest{ library(data.table) library(ggplot2) ## World Regions Structural Change Plot dat <- GGDC10S fselect(dat, AGR:OTH) <- replace_outliers(dapply(fselect(dat, AGR:OTH), `*`, 1 / dat$SUM), 0, NA, "min") dat$Variable <- recode_char(dat$Variable, VA = "Value Added Share", EMP = "Employment Share") dat <- collap(dat, ~ Variable + Region + Year, cols = 6:15) dat <- melt(qDT(dat), 1:3, variable.name = "Sector", na.rm = TRUE) ggplot(aes(x = Year, y = value, fill = Sector), data = dat) + geom_area(position = "fill", alpha = 0.9) + labs(x = NULL, y = NULL) + theme_linedraw(base_size = 14) + facet_grid(Variable ~ Region, scales = "free_x") + scale_fill_manual(values = sub("#00FF66FF", "#00CC66", rainbow(10))) + scale_x_continuous(breaks = scales::pretty_breaks(n = 7), expand = c(0, 0))+ scale_y_continuous(breaks = scales::pretty_breaks(n = 10), expand = c(0, 0), labels = scales::percent) + theme(axis.text.x = element_text(angle = 315, hjust = 0, margin = ggplot2::margin(t = 0)), strip.background = element_rect(colour = "grey30", fill = "grey30")) # A function to plot the structural change of an arbitrary country plotGGDC <- function(ctry) { dat <- fsubset(GGDC10S, Country == ctry, Variable, Year, AGR:SUM) fselect(dat, AGR:OTH) <- replace_outliers(dapply(fselect(dat, AGR:OTH), `*`, 1 / dat$SUM), 0, NA, "min") dat$SUM <- NULL dat$Variable <- recode_char(dat$Variable, VA = "Value Added Share", EMP = "Employment Share") dat <- melt(qDT(dat), 1:2, variable.name = "Sector", na.rm = TRUE) ggplot(aes(x = Year, y = value, fill = Sector), data = dat) + geom_area(position = "fill", alpha = 0.9) + labs(x = NULL, y = NULL) + theme_linedraw(base_size = 14) + facet_wrap( ~ Variable) + scale_fill_manual(values = sub("#00FF66", "#00CC66", rainbow(10))) + scale_x_continuous(breaks = scales::pretty_breaks(n = 7), expand = c(0, 0)) + scale_y_continuous(breaks = scales::pretty_breaks(n = 10), expand = c(0, 0), labels = scales::percent) + theme(axis.text.x = element_text(angle = 315, hjust = 0, margin = ggplot2::margin(t = 0)), strip.background = element_rect(colour = "grey20", fill = "grey20"), strip.text = element_text(face = "bold")) } plotGGDC("BWA") } } \keyword{datasets} collapse/man/small-helper.Rd0000644000176200001440000002034114174225763015516 0ustar liggesusers\name{small-helpers} \alias{AA3-small-helpers} \alias{small-helpers} \alias{.c} \alias{vlabels} \alias{vlabels<-} \alias{setLabels} \alias{vclasses} \alias{namlab} \alias{add_stub} \alias{rm_stub} \alias{\%!in\%} \alias{massign} \alias{\%=\%} \alias{ckmatch} \alias{all_identical} \alias{all_obj_equal} \alias{setRownames} \alias{setColnames} \alias{setDimnames} \alias{unattrib} \alias{setAttrib} \alias{copyAttrib} \alias{copyMostAttrib} \alias{is_categorical} \alias{is_date} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Small (Helper) Functions % \emph{collapse} } \description{ Convenience functions in the \emph{collapse} package that help to deal with object attributes such as variable names and labels, matching and object checking, and that improve the workflow. % Some functions are performance improved replacements for base R functions. % For recoding and replacing values see \code{\link{Recode}}. % for pairwise correlations and covariances see \code{\link{pwcor}}, for summary statistics see \code{\link{qsu}}. } \usage{ .c(\dots) # Non-standard concatenation i.e. .c(a, b) == c("a", "b") nam \%=\% values # Multiple-assignment e.g. .c(x, y) \%=\% c(1, 2), massign(nam, values, # can also assign to different environment. envir = parent.frame()) vlabels(X, attrn = "label", # Get labels of variables in X, in attr(X[[i]], attrn) use.names = TRUE) vlabels(X, attrn = "label") <- value # Set labels of variables in X (by reference) setLabels(X, value, attrn = "label", # Set labels of variables in X (by reference) cols = NULL) # and return X vclasses(X, use.names = TRUE) # Get classes of variables in X namlab(X, class = FALSE, # Return data frame of names and labels, attrn = "label", N = FALSE, # and (optionally) classes, number of observations Ndistinct = FALSE) # and number of non-missing distinct values add_stub(X, stub, pre = TRUE, # Add a stub (i.e. prefix or postfix) to column names cols = NULL) rm_stub(X, stub, pre = TRUE, # Remove stub from column names, also supports general regex = FALSE, # regex matching and removing of characters cols = NULL, ...) x \%!in\% table # The opposite of \%in\% ckmatch(x, table, # Check-match: throws an informative error if non-matched e = "Unknown columns:") all_identical(\dots) # Check exact equality of multiple objects or list-elements all_obj_equal(\dots) # Check near equality of multiple objects or list-elements setRownames(object, nm = if(is.atomic(object)) # Set rownames of object and return object seq_row(object) else NULL) setColnames(object, nm) # Set colnames of object and return object setDimnames(object, dn, which = NULL) # Set dimension names of object and return object unattrib(object) # Remove all attributes from object setAttrib(object, a) # Replace all attributes with list of attributes 'a' copyAttrib(to, from) # Copy all attributes from object 'from' to object 'to' copyMostAttrib(to, from) # Copy most attributes from object 'from' to object 'to' is_categorical(x) # The opposite of is.numeric is_date(x) # Check if object is of class "Date", "POSIXlt" or "POSIXct" } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{a matrix or data frame (some functions also support vectors and arrays although that is less common).} \item{x, table}{a (atomic) vector.} \item{object, to, from}{a suitable R object.} \item{a}{a suitable list of attributes.} \item{attrn}{character. Name of attribute to store labels or retrieve labels from.} \item{N, Ndistinct}{logical. Options to display the number of observations or number of distinct non-missing values.} \item{value}{for \code{whichv} and \code{alloc}: a single value of any vector type. For \code{vlabels<-} and \code{setLabels}: a matching character vector or list of variable labels. } \item{use.names}{logical. Preserve names if \code{X} is a list. } \item{cols}{integer. (optional) indices of columns to apply the operation to. Note that for these small functions this needs to be integer, whereas for other functions in the package this argument is more flexible. } \item{class}{logical. Also show the classes of variables in X in a column?} \item{stub}{a single character stub, i.e. "log.", which by default will be pre-applied to all variables or column names in X.} \item{pre}{logical. \code{FALSE} will post-apply \code{stub}.} \item{regex}{logical. Match pattern anywhere in names using a regular expression and remove it with \code{\link{gsub}}.} \item{nm}{a suitable vector of row- or column-names.} \item{dn}{a suitable vector or list of names for dimension(s).} \item{which}{integer. If \code{NULL}, \code{dn} has to be a list fully specifying the dimension names of the object. Alternatively, a vector or list of names for dimensions \code{which} can be supplied. See Examples. } \item{e}{the error message thrown by \code{ckmatch} for non-matched elements. The message is followed by the comma-separated non-matched elements.} \item{nam}{character. A vector of object names.} \item{values}{a matching atomic vector or list of objects.} \item{envir}{the environment to assign into.} \item{\dots}{for \code{.c}: Comma-separated expressions. For \code{all_identical / all_obj_equal}: Either multiple comma-separated objects or a single list of objects in which all elements will be checked for exact / numeric equality. For \code{rm_stub}: further arguments passed to \code{\link{gsub}}.} } \details{ \code{copyAttrib} and \code{copyMostAttrib} take a shallow copy of the attribute list, i.e. they don't duplicate in memory the attributes themselves. They also, along with \code{setAttrib}, take a shallow copy of lists passed to the \code{to} argument, so that lists are not modified by reference. Atomic \code{to} arguments are however modified by reference. \code{copyMostAttrib} copies all attributes except for \code{"names"}, \code{"dim"} and \code{"dimnames"} (like the corresponding C-API function), and further only copies the \code{"row.names"} attribute of data frames if known to be valid. Thus it is a suitable choice if objects should be of the same type but are not of equal dimensions. } % \value{ %% ~Describe the value returned %% If it is a LIST, use %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} %% ... % } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } % \note{ %% ~~further notes~~ % } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \link[=efficient-programming]{Efficient Programming}, \link[=collapse-documentation]{Collapse Overview} %% \code{\link{pwcor}}, \code{\link{qsu}} } \examples{ ## Non-standard concatenation .c(a, b, "c d", e == f) ## Multiple assignment .c(a, b) \%=\% list(1, 2) .c(T, N) \%=\% dim(EuStockMarkets) names(iris) \%=\% iris list2env(iris) # Same thing rm(list = c("a", "b", "T", "N", names(iris))) ## Variable labels namlab(wlddev) namlab(wlddev, class = TRUE, N = TRUE, Ndistinct = TRUE) vlabels(wlddev) vlabels(wlddev) <- vlabels(wlddev) ## Stub-renaming log_mtc <- add_stub(log(mtcars), "log.") head(log_mtc) head(rm_stub(log_mtc, "log.")) rm(log_mtc) ## Setting dimension names of an object head(setRownames(mtcars)) ar <- array(1:9, c(3,3,3)) setRownames(ar) setColnames(ar, c("a","b","c")) setDimnames(ar, c("a","b","c"), which = 3) setDimnames(ar, list(c("d","e","f"), c("a","b","c")), which = 2:3) setDimnames(ar, list(c("g","h","i"), c("d","e","f"), c("a","b","c"))) ## Checking exact equality of multiple objects all_identical(iris, iris, iris, iris) l <- replicate(100, fmean(num_vars(iris), iris$Species), simplify = FALSE) all_identical(l) rm(l) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{utilities} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{attribute} \keyword{misc} \keyword{documentation} collapse/man/group.Rd0000644000176200001440000000434514170153773014270 0ustar liggesusers\name{group} \alias{group} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Hash-Based Grouping } \description{ \code{group()} scans the rows of a data frame (or atomic vector / list of atomic vectors), assigning to each unique row an integer id - starting with 1 and proceeding in first-appearance order of the rows. The function is written in C and optimized for R's data structures. It is the workhorse behind functions like \code{\link{GRP}} / \code{\link{fgroup_by}}, \code{\link{collap}}, \code{\link{qF}}, \code{\link{qG}}, \code{\link{finteraction}} and \code{\link{funique}}, when called with argument \code{sort = FALSE}. } \usage{ group(x, starts = FALSE, group.sizes = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{an atomic vector or data frame / list of equal-length atomic vectors. } \item{starts}{logical. If \code{TRUE}, an additional attribute 'starts' is attached giving a vector of group starts (index of first-occurrence of unique rows). } \item{group.sizes}{ logical. If \code{TRUE}, an additional attribute 'group.sizes' is attached giving the size of each group. } } \details{ A data frame is grouped on a column-by-column basis, starting from the leftmost column. For each new column the grouping vector obtained after the previous column is also fed back into the hash function so that unique values are determined on a running basis. The algorithm terminates as soon as the number of unique rows reaches the size of the data frame. Missing values are also grouped just like any other values. Invoking arguments \code{starts} and/or \code{group.sizes} requires an additional pass through the final grouping vector. } \value{ An object is of class 'qG' see \code{\link{qG}}. } \author{ The Hash Function was taken from the excellent \emph{kit} package by Morgan Jacob. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \examples{ # Let's replicate what funique does g <- group(wlddev, starts = TRUE) if(attr(g, "N.groups") == fnrow(wlddev)) wlddev else ss(wlddev, attr(g, "starts")) } collapse/man/ftransform.Rd0000644000176200001440000003665314170152324015313 0ustar liggesusers\name{ftransform} \alias{fmutate} \alias{mtt} \alias{ftransform} \alias{tfm} \alias{ftransformv} \alias{tfmv} \alias{ftransform<-} \alias{tfm<-} \alias{settransform} \alias{settfm} \alias{settransformv} \alias{settfmv} \alias{fcompute} \alias{fcomputev} \title{Fast Transform and Compute Columns on a Data Frame} \description{ \code{ftransform} is a much faster version of \code{\link{transform}} for data frames. It returns the data frame with new columns computed and/or existing columns modified or deleted. \code{settransform} does all of that by reference. \code{fcompute} computes and returns new columns. These functions evaluate all arguments simultaneously, allow list-input (nested pipelines) and natively disregard grouped data. Catering to the \emph{tidyverse} user, v1.7.0 introduced \code{fmutate}, providing familiar functionality i.e. arguments are evaluated sequentially, computation on grouped data is done by groups, and functions can be applied to multiple columns using \code{\link{across}}. See also the Details. } \usage{ # Modify and return data frame ftransform(.data, \dots) ftransformv(.data, vars, FUN, \dots, apply = TRUE) tfm(.data, \dots) # Shortcut for ftransform tfmv(.data, vars, FUN, \dots, apply = TRUE) # Modify data frame by reference settransform(.data, \dots) settransformv(.data, vars, FUN, \dots, apply = TRUE) settfm(.data, \dots) # Shortcut for settransform settfmv(.data, vars, FUN, \dots, apply = TRUE) # Replace/add modified columns in/to a data frame ftransform(.data) <- value tfm(.data) <- value # Shortcut for ftransform<- # Compute columns, returned as a new data frame fcompute(.data, \dots, keep = NULL) fcomputev(.data, vars, FUN, \dots, apply = TRUE, keep = NULL) # New: dplyr-style mutate (sequential evaluation + across() feature) fmutate(.data, ..., .keep = "all") mtt(.data, ..., .keep = "all") # Shortcut for fmutate } \arguments{ \item{.data}{a data frame or named list of columns.} \item{\dots}{further arguments of the form \code{column = value}. The \code{value} can be a combination of other columns, a scalar value, or \code{NULL}, which deletes \code{column}. Alternatively it is also possible to place a single list here, which will be treated like a list of \code{column = value} arguments. For \code{ftransformv} and \code{fcomputev}, \code{\dots} can be used to pass further arguments to \code{FUN}. \emph{Note:} The ellipsis (\code{\dots}) is always evaluated within the data frame (\code{.data}) environment. See Examples. \code{fmutate} supports \code{\link{across}} statements, and evaluates tagged vector expressions sequentially. } \item{vars}{variables to be transformed by applying \code{FUN} to them: select using names, indices, a logical vector or a selector function (e.g. \code{is.numeric}). Since v1.7 \code{vars} is evaluated within the \code{.data} environment, permitting expressions on columns e.g. \code{c(col1, col3:coln)}.} \item{FUN}{a single function yielding a result of length \code{NROW(.data)} or 1. See also \code{apply}.} \item{apply}{logical. \code{TRUE} (default) will apply \code{FUN} to each column selected in \code{vars}; \code{FALSE} will apply \code{FUN} to the subsetted data frame i.e. \code{FUN(get_vars(.data, vars), ...)}. The latter is useful for \emph{collapse} functions with data frame or grouped / panel data frame methods, yielding performance gains and enabling grouped transformations. See Examples.} \item{value}{a named list of replacements, it will be treated like an evaluated list of \code{column = value} arguments.} \item{keep}{select columns to preserve using column names, indices or a function (e.g. \code{is.numeric}). By default computed columns are added after the preserved ones, unless they are assigned the same name in which case the preserved columns will be replaced in order.} \item{.keep}{Either one of \code{"all", "used", "unused"} or \code{"none"} (see \code{\link[dplyr]{mutate}}), or columns names/indices/function as \code{keep}.} } \details{ The \code{\dots} arguments to \code{ftransform} are tagged vector expressions, which are evaluated in the data frame \code{.data}. The tags are matched against \code{names(.data)}, and for those that match, the values replace the corresponding variable in \code{.data}, whereas the others are appended to \code{.data}. It is also possible to delete columns by assigning \code{NULL} to them, i.e. \code{ftransform(data, colk = NULL)} removes \code{colk} from the data. \emph{Note} that \code{names(.data)} and the names of the \code{...} arguments are checked for uniqueness beforehand, yielding an error if this is not the case. Since \emph{collapse} v1.3.0, is is also possible to pass a single named list to \code{\dots}, i.e. \code{ftransform(data, newdata)}. This list will be treated like a list of tagged vector expressions. \emph{Note} the different behavior: \code{ftransform(data, list(newcol = col1))} is the same as \code{ftransform(data, newcol = col1)}, whereas \code{ftransform(data, newcol = as.list(col1))} creates a list column. Something like \code{ftransform(data, as.list(col1))} gives an error because the list is not named. See Examples. % and \code{ftransform(data, as.list(col1))} gives an error because an unnamed list is passed. % , but \code{ftransform(data, setNames(as.list(col1), col1))} will work and add the values of \code{col1} as separate columns. % \code{ftransform(data, fmean(list(col1mean = col1, col2mean = col2), drop = FALSE))} etc. % For example \code{ftransformv(data, 1:3, log)} is the same as \code{ftransform(data, lapply(get_vars(data, 1:3), log))}, and \code{ftransformv(data, 1:3, log, apply = FALSE)} is the same as \code{ftransform(data, log(get_vars(data, 1:3)))}. The function \code{ftransformv} added in v1.3.2 provides a fast replacement for the functions \code{dplyr::mutate_at} and \code{dplyr::mutate_if} (without the grouping feature) facilitating mutations of groups of columns (\code{dplyr::mutate_all} is already accounted for by \code{\link{dapply}}). See Examples. The function \code{settransform} does all of that by reference, but uses base-R's copy-on modify semantics, which is equivalent to replacing the data with \code{<-} (thus it is still memory efficient but the data will have a different memory address afterwards). The function \code{fcompute(v)} works just like \code{ftransform(v)}, but returns only the changed / computed columns without modifying or appending the data in \code{.data}. See Examples. The function \code{fmutate} added in v1.7.0, provides functionality familiar from \emph{dplyr} 1.0.0 and higher. It evaluates tagged vector expressions sequentially and does operations by groups on a grouped frame (thus it is slower than \code{ftransform} if you have many tagged expressions or a grouped data frame). Note however that \emph{collapse} does not depend on \emph{rlang}, so fancy things like data masking or lambda expressions are not available. \emph{Note also} that \code{fmutate} operates differently on grouped data whether you use \code{.FAST_FUN} or base R functions / functions from other packages. With \code{.FAST_FUN} (including \code{.OPERATOR_FUN}, excluding \code{fhdbetween} / \code{fhdwithin} / \code{HDW} / \code{HDB}), \code{fmutate} performs an efficient vectorized execution, i.e. the grouping object from the grouped data frame is passed to the \code{g} argument of these functions, and for \code{.FAST_STAT_FUN} also \code{TRA = "replace_fill"} is set (if not overwritten by the user), yielding internal grouped computation by these functions without the need for splitting the data by groups. For base R and other functions, \code{fmutate} performs classical split-apply combine computing i.e. the relevant columns of the data are selected and split into groups, the expression is evaluated for each group, and the result is recombined and suitably expanded to match the original data frame. \bold{Note} that it is not possible to mix vectorized and standard execution in the same expression!! Vectorized execution is performed if \bold{any} \code{.FAST_FUN} or \code{.OPERATOR_FUN} is part of the expression, thus a code like \code{mtcars |> gby(cyl) |> fmutate(new = fmin(mpg) / min(mpg))} will be expanded to something like \code{mtcars \%>\% gby(cyl) \%>\% ftransform(new = fmin(mpg, g = GRP(.), TRA = "replace_fill") / min(mpg))} and then executed, i.e. \code{fmin(mpg)} will be executed in a vectorized way, and \code{min(mpg)} will not be executed by groups at all. } \note{ \code{ftransform} does not do anything per se with a grouped data frame. This is on purpose as it affords greater flexibility and performance in programming with the \code{.FAST_FUN}, through which \emph{collapse} supports \emph{various kinds} of fully vectorized grouped transformations (see \code{\link{TRA}} for a list of available transformations). In particular, you can run a nested pipeline inside \code{ftransform}, and decide which expressions should be grouped, and you can use the ad-hoc grouping functionality of the \code{.FAST_FUN}, allowing operations where different groupings are applied simultaneously in an expression. See Examples or the answer provided \href{https://stackoverflow.com/questions/67349744/using-ftransform-along-with-fgroup-by-from-collapse-r-package}{here}. \code{fmutate} on the other hand supports grouped operations just like \code{dplyr::mutate}, but works in two different ways depending on whether you use \code{.FAST_FUN} in an expression or other functions. See the Examples section of \code{\link{fsummarise}} for an illustration. } \value{ The modified data frame \code{.data}, or, for \code{fcompute}, a new data frame with the columns computed on \code{.data}. All attributes of \code{.data} are preserved. } \seealso{ \code{\link{across}}, \code{\link{fsummarise}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## ftransform modifies and returns a data.frame head(ftransform(airquality, Ozone = -Ozone)) head(ftransform(airquality, new = -Ozone, Temp = (Temp-32)/1.8)) head(ftransform(airquality, new = -Ozone, new2 = 1, Temp = NULL)) # Deleting Temp head(ftransform(airquality, Ozone = NULL, Temp = NULL)) # Deleting columns # With collapse's grouped and weighted functions, complex operations are done on the fly head(ftransform(airquality, # Grouped operations by month: Ozone_Month_median = fmedian(Ozone, Month, TRA = "replace_fill"), Ozone_Month_sd = fsd(Ozone, Month, TRA = "replace"), Ozone_Month_centered = fwithin(Ozone, Month))) # Grouping by month and above/below average temperature in each month head(ftransform(airquality, Ozone_Month_high_median = fmedian(Ozone, list(Month, Temp > fbetween(Temp, Month)), TRA = "replace_fill"))) ## ftransformv can be used to modify multiple columns using a function head(ftransformv(airquality, 1:3, log)) head(`[<-`(airquality, 1:3, value = lapply(airquality[1:3], log))) # Same thing in base R head(ftransformv(airquality, 1:3, log, apply = FALSE)) head(`[<-`(airquality, 1:3, value = log(airquality[1:3]))) # Same thing in base R # Using apply = FALSE yields meaningful performance gains with collapse functions # This calls fwithin.default, and repeates the grouping by month 3 times: head(ftransformv(airquality, 1:3, fwithin, Month)) # This calls fwithin.data.frame, and only groups one time -> 5x faster! head(ftransformv(airquality, 1:3, fwithin, Month, apply = FALSE)) library(magrittr) # Pipe operators # This also works for grouped and panel data frames (calling fwithin.grouped_df) airquality \%>\% fgroup_by(Month) \%>\% ftransformv(1:3, fwithin, apply = FALSE) \%>\% head # But this gives the WRONG result (calling fwithin.default). Need option apply = FALSE!! airquality \%>\% fgroup_by(Month) \%>\% ftransformv(1:3, fwithin) \%>\% head # For grouped modification of single columns in a grouped dataset, we can use GRP(): airquality \%>\% fgroup_by(Month) \%>\% ftransform(W_Ozone = fwithin(Ozone, GRP(.)), # Grouped centering sd_Ozone_m = fsd(Ozone, GRP(.), TRA = "replace"), # In-Month standard deviation sd_Ozone = fsd(Ozone, TRA = "replace"), # Overall standard deviation sd_Ozone2 = fsd(Ozone, TRA = "replace_fill"), # Same, overwriting NA's sd_Ozone3 = fsd(Ozone)) \%>\% head # Same thing (calling alloc()) rm(airquality) ## For more complex mutations we can use ftransform with compound pipes airquality \%>\% fgroup_by(Month) \%>\% ftransform(get_vars(., 1:3) \%>\% fwithin \%>\% flag(0:2)) \%>\% head airquality \%>\% ftransform(STD(., cols = 1:3) \%>\% replace_NA(0)) \%>\% head # The list argument feature also allows flexible operations creating multiple new columns airquality \%>\% # The variance of Wind and Ozone, by month, weighted by temperature: ftransform(fvar(list(Wind_var = Wind, Ozone_var = Ozone), Month, Temp, "replace")) \%>\% head # Same as above using a grouped data frame (a bit more complex) airquality \%>\% fgroup_by(Month) \%>\% ftransform(fselect(., Wind, Ozone) \%>\% fvar(Temp, "replace") \%>\% add_stub("_var", FALSE)) \%>\% fungroup \%>\% head # This performs 2 different multi-column grouped operations (need c() to make it one list) ftransform(airquality, c(fmedian(list(Wind_Day_median = Wind, Ozone_Day_median = Ozone), Day, TRA = "replace"), fsd(list(Wind_Month_sd = Wind, Ozone_Month_sd = Ozone), Month, TRA = "replace"))) \%>\% head ## settransform(v) works like ftransform(v) but modifies a data frame in the global environment.. settransform(airquality, Ratio = Ozone / Temp, Ozone = NULL, Temp = NULL) head(airquality) rm(airquality) # Grouped and weighted centering settransformv(airquality, 1:3, fwithin, Month, Temp, apply = FALSE) head(airquality) rm(airquality) # Suitably lagged first-differences settransform(airquality, get_vars(airquality, 1:3) \%>\% fdiff \%>\% flag(0:2)) head(airquality) rm(airquality) # Same as above using magrittr::`\%<>\%` airquality \%<>\% ftransform(get_vars(., 1:3) \%>\% fdiff \%>\% flag(0:2)) head(airquality) rm(airquality) # It is also possible to achieve the same thing via a replacement method (if needed) ftransform(airquality) <- get_vars(airquality, 1:3) \%>\% fdiff \%>\% flag(0:2) head(airquality) rm(airquality) ## fcompute only returns the modified / computed columns head(fcompute(airquality, Ozone = -Ozone)) head(fcompute(airquality, new = -Ozone, Temp = (Temp-32)/1.8)) head(fcompute(airquality, new = -Ozone, new2 = 1)) # Can preserve existing columns, computed ones are added to the right if names are different head(fcompute(airquality, new = -Ozone, new2 = 1, keep = 1:3)) # If given same name as preserved columns, preserved columns are replaced in order... head(fcompute(airquality, Ozone = -Ozone, new = 1, keep = 1:3)) # Same holds for fcomputev head(fcomputev(iris, is.numeric, log)) # Same as: iris \%>\% get_vars(is.numeric) \%>\% dapply(log) \%>\% head() head(fcomputev(iris, is.numeric, log, keep = "Species")) # Adds in front head(fcomputev(iris, is.numeric, log, keep = names(iris))) # Preserve order # Keep a subset of the data, add standardized columns head(fcomputev(iris, 3:4, STD, apply = FALSE, keep = names(iris)[3:5])) } \keyword{manip} collapse/man/fcumsum.Rd0000644000176200001440000001225314172533272014606 0ustar liggesusers\name{fcumsum} \alias{fcumsum} \alias{fcumsum.default} \alias{fcumsum.matrix} \alias{fcumsum.data.frame} \alias{fcumsum.pseries} \alias{fcumsum.pdata.frame} \alias{fcumsum.grouped_df} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast (Grouped, Ordered) Cumulative Sum for Matrix-Like Objects } \description{ \code{fcumsum} is a generic function that computes the (column-wise) cumulative sum of \code{x}, (optionally) grouped by \code{g} and/or ordered by \code{o}. Several options to deal with missing values are provided. } \usage{ fcumsum(x, \dots) \method{fcumsum}{default}(x, g = NULL, o = NULL, na.rm = TRUE, fill = FALSE, check.o = TRUE, \dots) \method{fcumsum}{matrix}(x, g = NULL, o = NULL, na.rm = TRUE, fill = FALSE, check.o = TRUE, \dots) \method{fcumsum}{data.frame}(x, g = NULL, o = NULL, na.rm = TRUE, fill = FALSE, check.o = TRUE, \dots) # Methods for compatibility with plm: \method{fcumsum}{pseries}(x, na.rm = TRUE, fill = FALSE, \dots) \method{fcumsum}{pdata.frame}(x, na.rm = TRUE, fill = FALSE, \dots) # Methods for grouped data frame / compatibility with dplyr: \method{fcumsum}{grouped_df}(x, o = NULL, na.rm = TRUE, fill = FALSE, check.o = TRUE, keep.ids = TRUE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a vector / time series, matrix, data frame, panel series (\code{plm::pseries}), panel data frame (\code{plm::pdata.frame}) or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{o}{a vector or list of vectors providing the order in which the elements of \code{x} are cumulatively summed. Will be passed to \code{\link{radixorderv}} unless \code{check.o = FALSE}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost.} \item{fill}{if \code{na.rm = TRUE}, setting \code{fill = TRUE} will overwrite missing values with the previous value of the cumulative sum, starting from 0.} \item{check.o}{logical. Programmers option: \code{FALSE} prevents passing \code{o} to \code{\link{radixorderv}}, requiring \code{o} to be a valid ordering vector that is integer typed with each element in the range \code{[1, length(x)]}. This gives some extra speed, but will terminate R if any element of \code{o} is too large or too small. } \item{keep.ids}{\emph{pdata.frame / grouped_df methods}: Logical. Drop all identifiers from the output (which includes all grouping variables and variables passed to\code{o}). \emph{Note}: For grouped / panel data frames identifiers are dropped, but the 'groups' / 'index' attributes are kept.} \item{\dots}{arguments to be passed to or from other methods.} } \details{ If \code{na.rm = FALSE}, \code{fcumsum} works like \code{\link{cumsum}} and propagates missing values. The default \code{na.rm = TRUE} skips missing values and computes the cumulative sum on the non-missing values. Missing values are kept. If \code{fill = TRUE}, missing values are replaced with the previous value of the cumulative sum (starting from 0), computed on the non-missing values. By default the cumulative sum is computed in the order in which elements appear in \code{x}. If \code{o} is provided, the cumulative sum is computed in the order given by \code{radixorderv(o)}, without the need to first sort \code{x}. This applies as well if groups are used (\code{g}), in which the cumulative sum is computed separately in each group. The \emph{pseries} and \emph{pdata.frame} methods assume that the last factor in the \code{plm::index} is the time-variable and the rest are grouping variables. The time-variable is passed to \code{radixorderv} and used for ordered computation, so that cumulative sums are accurately computed regardless of whether the panel-data is ordered or balanced. \code{fcumsum} explicitly supports integers. Integers in R are bounded at bounded at +-2,147,483,647, and an integer overflow error will be provided if the cumulative sum (within any group) exceeds +-2,147,483,647. } \value{ the cumulative sum of values in \code{x}, (optionally) grouped by \code{g} and/or ordered by \code{o}. See Details and Examples. } \seealso{ \code{\link{fdiff}}, \code{\link{fgrowth}}, \link[=time-series-panel-series]{Time Series and Panel Series}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Non-grouped fcumsum(AirPassengers) head(fcumsum(EuStockMarkets)) fcumsum(mtcars) # Non-grouped but ordered o <- order(rnorm(nrow(EuStockMarkets))) all.equal(copyAttrib(fcumsum(EuStockMarkets[o, ], o = o)[order(o), ], EuStockMarkets), fcumsum(EuStockMarkets)) ## Grouped head(with(wlddev, fcumsum(PCGDP, iso3c))) ## Grouped and ordered head(with(wlddev, fcumsum(PCGDP, iso3c, year))) head(with(wlddev, fcumsum(PCGDP, iso3c, year, fill = TRUE))) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{ts} % __ONLY ONE__ keyword per line collapse/man/rsplit.Rd0000644000176200001440000000615214167160635014450 0ustar liggesusers\name{rsplit} \alias{rsplit} \alias{rsplit.default} \alias{rsplit.data.frame} \title{ Recursive Splitting } \description{ \code{rsplit} recursively splits a vector or data frame into subsets according to combinations of (multiple) vectors / factors - by default returning a (nested) list. If \code{flatten = TRUE}, the list is flattened yielding the same result as \code{\link{split}}. \code{rsplit} is implemented as a wrapper around \code{\link{gsplit}}, and faster than \code{\link{split}}. } \usage{ rsplit(x, \dots) \method{rsplit}{default}(x, fl, drop = TRUE, flatten = FALSE, use.names = TRUE, \dots) \method{rsplit}{data.frame}(x, by, drop = TRUE, flatten = FALSE, cols = NULL, keep.by = FALSE, simplify = TRUE, use.names = TRUE, \dots) } \arguments{ \item{x}{a vector, data.frame or list.} \item{fl}{a vector / factor, \code{\link{GRP}} object, or list of vectors / factors used to split \code{x}.} \item{by}{\emph{data.frame method}: Same as \code{fl}, but also allows one- or two-sided formulas i.e. \code{~ group1} or \code{var1 + var2 ~ group1 + group2}. See Examples.} \item{drop}{logical. \code{TRUE} removes unused levels or combinations of levels from factors before splitting; \code{FALSE} retains those combinations yielding empty list elements in the output.} \item{flatten}{logical. If \code{fl} is a list of vectors / factors, \code{TRUE} calls \code{\link{GRP}} on the list, creating a single grouping used for splitting; \code{FALSE} yields recursive splitting.} \item{use.names}{logical. \code{TRUE} returns a named list (like \code{\link{split}}); \code{FALSE} returns a plain list.} \item{cols}{\emph{data.frame method}: Select columns to split using a function, column names, indices or a logical vector. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{keep.by}{logical. If a formula is passed to \code{by}, then \code{TRUE} preserves the splitting (right-hand-side) variables in the data frame.} \item{simplify}{\emph{data.frame method}: Logical. \code{TRUE} calls \code{rsplit.default} if a single column is split e.g. \code{rsplit(data, col1 ~ group1)} becomes the same as \code{rsplit(data$col1, data$group1)}.} \item{\dots}{further arguments passed to \code{\link{GRP}}. Sensible choices would be \code{sort = FALSE}, \code{decreasing = TRUE} or \code{na.last = FALSE}. Note that these options only apply if \code{fl} is not already a factor.} } \value{ a (nested) list containing the subsets of \code{x}. } \seealso{ \code{\link{gsplit}}, \code{\link{rapply2d}}, \code{\link{unlist2d}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview} } \examples{ rsplit(mtcars$mpg, mtcars$cyl) rsplit(mtcars, mtcars$cyl) rsplit(mtcars, mtcars[.c(cyl, vs, am)]) rsplit(mtcars, ~ cyl + vs + am, keep.by = TRUE) # Same thing rsplit(mtcars, ~ cyl + vs + am) rsplit(mtcars, ~ cyl + vs + am, flatten = TRUE) rsplit(mtcars, mpg ~ cyl) rsplit(mtcars, mpg ~ cyl, simplify = FALSE) rsplit(mtcars, mpg + hp ~ cyl + vs + am) rsplit(mtcars, mpg + hp ~ cyl + vs + am, keep.by = TRUE) } \keyword{manip} collapse/man/collap.Rd0000644000176200001440000004535214176656475014426 0ustar liggesusers\name{collap} \alias{advanced-aggregation} \alias{collap} \alias{collapv} \alias{collapg} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Advanced Data Aggregation } \description{ \code{collap} is a fast and easy to use multi-purpose data aggregation command. It performs simple aggregations, multi-type data aggregations applying different functions to numeric and categorical data, weighted aggregations, multi-function aggregations applying multiple functions to each column, and fully customized aggregations where the user passes a list mapping functions to columns. \code{collap} works with \emph{collapse}'s \link[=fast-statistical-functions]{Fast Statistical Functions}, providing extremely fast conventional and weighted aggregation. It also works with other functions but this does not deliver high speeds on large data and does not support weighted aggregations. % \code{collap} supports formula and data (i.e. grouping vectors or lists of vectors) input to \code{by}, whereas \code{collapv} allows names and indices of grouping columns to be passed to \code{by}. } \usage{ # Main function: allows formula and data input to `by` and `w` arguments collap(X, by, FUN = fmean, catFUN = fmode, cols = NULL, w = NULL, wFUN = fsum, custom = NULL, keep.by = TRUE, keep.w = TRUE, keep.col.order = TRUE, sort = TRUE, decreasing = FALSE, na.last = TRUE, return.order = sort, method = "auto", parallel = FALSE, mc.cores = 2L, return = c("wide","list","long","long_dupl"), give.names = "auto", \dots) # Programmer function: allows column names and indices input to `by` and `w` arguments collapv(X, by, FUN = fmean, catFUN = fmode, cols = NULL, w = NULL, wFUN = fsum, custom = NULL, keep.by = TRUE, keep.w = TRUE, keep.col.order = TRUE, sort = TRUE, decreasing = FALSE, na.last = TRUE, return.order = sort, method = "auto", parallel = FALSE, mc.cores = 2L, return = c("wide","list","long","long_dupl"), give.names = "auto", \dots) # Auxiliary function: for grouped data ('grouped_df') input + non-standard evaluation collapg(X, FUN = fmean, catFUN = fmode, cols = NULL, w = NULL, wFUN = fsum, custom = NULL, keep.group_vars = TRUE, keep.w = TRUE, keep.col.order = TRUE, parallel = FALSE, mc.cores = 2L, return = c("wide","list","long","long_dupl"), give.names = "auto", \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{a data frame, or an object coercible to data frame using \code{\link{qDF}}.} \item{by}{for \code{collap}: a one-or two sided formula, i.e. \code{~ group1} or \code{var1 + var2 ~ group1 + group2}, or a atomic vector, list of vectors or \code{\link{GRP}} object used to group \code{X}. For \code{collapv}: names or indices of grouping columns, or a logical vector or selector function such as \code{\link{is_categorical}} selecting grouping columns.} \item{FUN}{a function, list of functions (i.e. \code{list(fsum, fmean, fsd)} or \code{list(myfun1 = function(x).., sd = sd)}), or a character vector of function names, which are automatically applied only to numeric variables.} \item{catFUN}{same as \code{FUN}, but applied only to categorical (non-numeric) typed columns (\code{\link{is_categorical}}).} \item{cols}{select columns to aggregate using a function, column names, indices or logical vector. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{w}{weights. Can be passed as numeric vector or alternatively as formula i.e. \code{~ weightvar} in \code{collap} or column name / index etc. i.e. \code{"weightvar"} in \code{collapv}. \code{collapg} supports non-standard evaluations so \code{weightvar} can be indicated without quotes if found in \code{X}. } \item{wFUN}{same as \code{FUN}: Function(s) to aggregate weight variable if \code{keep.w = TRUE}. By default the sum of the weights is computed in each group.} \item{custom}{a named list specifying a fully customized aggregation task. The names of the list are function names and the content columns to aggregate using this function (same input as \code{cols}). For example \code{custom = list(fmean = 1:6, fsd = 7:9, fmode = 10:11)} tells \code{collap} to aggregate columns 1-6 of \code{X} using the mean, columns 7-9 using the standard deviation etc. \emph{Notes}: \code{custom} lets \code{collap} ignore any inputs passed to \code{FUN}, \code{catFUN} or \code{cols}. Since v1.6.0 you can also rename columns e.g. \code{custom = list(fmean = c(newname = "col1", "col2"), fmode = c(newname = 3))}.} \item{keep.by, keep.group_vars}{logical. \code{FALSE} will omit grouping variables from the output. \code{TRUE} keeps the variables, even if passed externally in a list or vector (unlike other \emph{collapse} functions).} \item{keep.w}{logical. \code{FALSE} will omit weight variable from the output i.e. no aggregation of the weights. \code{TRUE} aggregates and adds weights, even if passed externally as a vector (unlike other \emph{collapse} functions).} \item{keep.col.order}{logical. Retain original column order post-aggregation.} \item{sort, decreasing, na.last, return.order, method}{logical / character. Arguments passed to \code{\link{GRP.default}} and affecting the row-order in the aggregated data frame and the grouping algorithm.} \item{parallel}{logical. Use \code{\link{mclapply}} instead of \code{lapply} to parallelize the computation at the column level. Not available for Windows.} \item{mc.cores}{integer. Argument to \code{\link{mclapply}} setting the number of cores to use, default is 2.} \item{return}{character. Control the output format when aggregating with multiple functions or performing custom aggregation. "wide" (default) returns a wider data frame with added columns for each additional function. "list" returns a list of data frames - one for each function. "long" adds a column "Function" and row-binds the results from different functions using \code{data.table::rbindlist}. "long.dupl" is a special option for aggregating multi-type data using multiple \code{FUN} but only one \code{catFUN} or vice-versa. In that case the format is long and data aggregated using only one function is duplicated. See Examples.} \item{give.names}{logical. Create unique names of aggregated columns by adding a prefix 'FUN.var'. \code{'auto'} will automatically create such prefixes whenever multiple functions are applied to a column. % By default \code{"."} is used as a separator between 'FUN' and 'var'. It is also possible to choose a different separator by specifying \code{give.names = "_"}, for example. } % \item{sort.row}{depreciated, renamed to \code{sort}.} \item{\dots}{additional arguments passed to all functions supplied to \code{FUN}, \code{catFUN}, \code{wFUN} or \code{custom}. The behavior of \link[=fast-statistical-functions]{Fast Statistical Functions} is regulated by \code{option("collapse_unused_arg_action")} and defaults to \code{"warning"}. } } \details{ \code{collap} automatically checks each function passed to it whether it is a \link[=fast-statistical-functions]{Fast Statistical Function} (i.e. whether the function name is contained in \code{.FAST_STAT_FUN}). If the function is a fast statistical function, \code{collap} only does the grouping and then calls the function to carry out the grouped computations. If the function is not one of \code{.FAST_STAT_FUN}, \code{\link{BY}} is called internally to perform the computation. The resulting computations from each function are put into a list and recombined to produce the desired output format as controlled by the \code{return} argument. When setting \code{parallel = TRUE} on a non-windows computer, aggregations will efficiently be parallelized at the column level using \code{\link{mclapply}} utilizing \code{mc.cores} cores. } \value{ \code{X} aggregated. If \code{X} is not a data frame it is coerced to one using \code{\link{qDF}} and then aggregated. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ %} \note{ (1) Additional arguments passed are not split by groups. Weighted aggregations with user defined functions should be done with \code{\link{fsummarise}}, or using the \emph{data.table} package. (2) When the \code{w} argument is used, the weights are passed to all \link[=fast-statistical-functions]{Fast Statistical Functions}. This may be undesirable in settings like \code{collapse::collap(data, ~ id, custom = list(fsum = ..., fmean = ...), w = ~ weights)} where we wish to aggregate some columns using the weighted mean, and others using a simple sum or another unweighted statistic. %Since many \link[=fast-statistical-functions]{Fast Statistical Functions} including \code{\link{fsum}} support weights, the above computes a weighted mean and a weighted sum. A couple of workarounds were outlined \href{https://github.com/SebKrantz/collapse/issues/96}{here}, but \emph{collapse} 1.5.0 incorporates an easy solution into \code{collap}: Therefore it is possible to append \link[=fast-statistical-functions]{Fast Statistical Functions} by \code{_uw} to yield an unweighted computation. So for the above example we can write: \code{collapse::collap(data, ~ id, custom = list(fsum_uw = ..., fmean = ...), w = ~ weights)} to get the weighted mean and the simple sum. \emph{Note} that the \code{_uw} functions are not available for use outside collap. Thus one also needs to quote them when passing to the \code{FUN} or \code{catFUN} arguments, e.g. use \code{collap(data, ~ id, fmean, "fmode_uw", w = ~ weighs)}. \emph{Note} also that it is never necessary for functions passed to \code{wFUN} to be appended like this, as the weights are never used to aggregate themselves. (3) The dispatch between using optimized \link[=fast-statistical-functions]{Fast Statistical Functions} performing grouped computations internally or calling \code{BY} to perform split-apply-combine computing is done by matching the function name against \code{.FAST_STAT_FUN}. Thus code like \code{collapse::collap(data, ~ id, collapse::fmedian)} does not yield an optimized computation, as \code{"collapse::fmedian" \%!in\% .FAST_STAT_FUN}. It is sufficient to write \code{collapse::collap(data, ~ id, "fmedian")} to get the desired result when the \emph{collapse} namespace is not attached. %If you want to perform optimized computations with \code{collap} without loading the pacckage, load the functions beforehand as well, e.g. \code{fmedian <- collapse::fmedian; data, ~ id, fmedian)}. Alternatively it is of course also possible to use \code{collapse::fmedian(collapse::fgroup_by(data, id))}, or something similar... % \code{collap} by default (\code{keep.by = TRUE, keep.w = TRUE}) preserves all arguments passed to the \code{by} or \code{w} arguments, whether passed in a formula or externally. The names of externally passed vectors and lists are intelligently extracted. So it is possible to write \code{collap(iris, iris$Species)}, and obtain an aggregated data frame with two \code{Species} columns, whereas \code{collap(iris, ~ Species)} only has one \code{Species} column. Similarly for weight vectors passed to \code{w}. In this regard \code{collap} is more sophisticated than other \emph{collapse} functions where preservation of grouping and weight variables is restricted to formula use. For example \code{STD(iris, iris$Species)} does not preserve \code{Species} in the output, whereas \code{STD(iris, ~ Species)} does. This \code{collap} feature is there simply for convenience, for example sometimes a survey is disaggregated into several datasets, and this now allows easy pulling of identifiers or weights from other datasets for aggregations. If all information is available in one dataset, just using formulas is highly recommended. } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{fsummarise}}, \code{\link{BY}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## A Simple Introduction -------------------------------------- head(iris) collap(iris, ~ Species) # Default: FUN = fmean for numeric collapv(iris, 5) # Same using collapv collap(iris, ~ Species, fmedian) # Using the median collap(iris, ~ Species, fmedian, keep.col.order = FALSE) # Groups in-front collap(iris, Sepal.Width + Petal.Width ~ Species, fmedian) # Only '.Width' columns collapv(iris, 5, cols = c(2, 4)) # Same using collapv collap(iris, ~ Species, list(fmean, fmedian)) # Two functions collap(iris, ~ Species, list(fmean, fmedian), return = "long") # Long format collapv(iris, 5, custom = list(fmean = 1:2, fmedian = 3:4)) # Custom aggregation collapv(iris, 5, custom = list(fmean = 1:2, fmedian = 3:4), # Raw output, no column reordering return = "list") collapv(iris, 5, custom = list(fmean = 1:2, fmedian = 3:4), # A strange choice.. return = "long") collap(iris, ~ Species, w = ~ Sepal.Length) # Using Sepal.Length as weights, .. weights <- abs(rnorm(fnrow(iris))) collap(iris, ~ Species, w = weights) # Some random weights.. collap(iris, iris$Species, w = weights) # Note this behavior.. collap(iris, iris$Species, w = weights, keep.by = FALSE, keep.w = FALSE) % \donttest{iris |> fgroup_by(Species) |> collapg()} # dplyr style, but faster ## Multi-Type Aggregation -------------------------------------- head(wlddev) # World Development Panel Data head(collap(wlddev, ~ country + decade)) # Aggregate by country and decade head(collap(wlddev, ~ country + decade, fmedian, ffirst)) # Different functions head(collap(wlddev, ~ country + decade, cols = is.numeric)) # Aggregate only numeric columns head(collap(wlddev, ~ country + decade, cols = 9:13)) # Only the 5 series head(collap(wlddev, PCGDP + LIFEEX ~ country + decade)) # Only GDP and life-expactancy head(collap(wlddev, PCGDP + LIFEEX ~ country + decade, fsum)) # Using the sum instead head(collap(wlddev, PCGDP + LIFEEX ~ country + decade, sum, # Same using base::sum -> slower! na.rm = TRUE)) head(collap(wlddev, wlddev[c("country","decade")], fsum, # Same, exploring different inputs cols = 9:10)) head(collap(wlddev[9:10], wlddev[c("country","decade")], fsum)) head(collapv(wlddev, c("country","decade"), fsum)) # ..names/indices with collapv head(collapv(wlddev, c(1,5), fsum)) g <- GRP(wlddev, ~ country + decade) # Precomputing the grouping head(collap(wlddev, g, keep.by = FALSE)) # This is slightly faster now # Aggregate categorical data using not the mode but the last element head(collap(wlddev, ~ country + decade, fmean, flast)) head(collap(wlddev, ~ country + decade, catFUN = flast, # Aggregate only categorical data cols = is_categorical)) ## Weighted Aggregation ---------------------------------------- # We aggregate to region level using population weights head(collap(wlddev, ~ region + year, w = ~ POP)) # Takes weighted mean for numeric.. # ..and weighted mode for categorical data. The weight vector is aggregated using fsum head(collap(wlddev, ~ region + year, w = ~ POP, # Aggregating weights using sum wFUN = list(fsum, fmax))) # and max (corresponding to mode) ## Multi-Function Aggregation ---------------------------------- head(collap(wlddev, ~ country + decade, list(fmean, fnobs), # Saving mean and Nobs cols = 9:13)) head(collap(wlddev, ~ country + decade, # Same using base R -> slower list(mean = mean, Nobs = function(x, \dots) sum(!is.na(x))), cols = 9:13, na.rm = TRUE)) lapply(collap(wlddev, ~ country + decade, # List output format list(fmean, fnobs), cols = 9:13, return = "list"), head) head(collap(wlddev, ~ country + decade, # Long output format list(fmean, fnobs), cols = 9:13, return = "long")) head(collap(wlddev, ~ country + decade, # Also aggregating categorical data, list(fmean, fnobs), return = "long_dupl")) # and duplicating it 2 times head(collap(wlddev, ~ country + decade, # Now also using 2 functions on list(fmean, fnobs), list(fmode, flast), # categorical data keep.col.order = FALSE)) head(collap(wlddev, ~ country + decade, # More functions, string input, c("fmean","fsum","fnobs","fsd","fvar"), # parallelized execution c("fmode","ffirst","flast","fndistinct"), # (choose more than 1 cores, parallel = TRUE, mc.cores = 1L, # depending on your machine) keep.col.order = FALSE)) ## Custom Aggregation ------------------------------------------ head(collap(wlddev, ~ country + decade, # Custom aggregation custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8))) head(collap(wlddev, ~ country + decade, # Using column names custom = list(fmean = "PCGDP", fsd = c("LIFEEX","GINI"), flast = "date"))) head(collap(wlddev, ~ country + decade, # Weighted parallelized custom custom = list(fmean = 9:12, fsd = 9:10, # aggregation fmode = 7:8), w = ~ POP, wFUN = list(fsum, fmax), parallel = TRUE, mc.cores = 1L)) head(collap(wlddev, ~ country + decade, # No column reordering custom = list(fmean = 9:12, fsd = 9:10, fmode = 7:8), w = ~ POP, wFUN = list(fsum, fmax), parallel = TRUE, mc.cores = 1L, keep.col.order = FALSE)) % \donttest{ ## Piped Use -------------------------------------------------- library(magrittr) # Note: Used because |> is not available on older R versions iris \%>\% fgroup_by(Species) \%>\% collapg() wlddev \%>\% fgroup_by(country, decade) \%>\% collapg() \%>\% head() wlddev \%>\% fgroup_by(region, year) \%>\% collapg(w = POP) \%>\% head() wlddev \%>\% fgroup_by(country, decade) \%>\% collapg(fmedian, flast) \%>\% head() wlddev \%>\% fgroup_by(country, decade) \%>\% collapg(custom = list(fmean = 9:12, fmode = 5:7, flast = 3)) \%>\% head() % } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line collapse/man/collapse-depreciated.Rd0000644000176200001440000000725714167161167017214 0ustar liggesusers\name{collapse-depreciated} \alias{collapse-depreciated} \alias{Recode} \alias{replace_non_finite} \alias{is.regular} \title{ Depreciated \emph{collapse} Functions } \description{ The functions \code{Recode} and \code{replace_non_finite} available until \emph{collapse} v1.1.0 will be removed soon. Since v1.2.0, \code{Recode} is replaced by \code{\link{recode_num}} and \code{\link{recode_char}} and \code{replace_non_finite} is replaced by \code{\link{replace_Inf}}. Since version 1.5.1, \code{is.regular} is depreciated - the function is not very useful and clashes with a more important one in the \emph{zoo} package. %The function \code{as.factor.GRP} was renamed to \code{\link{as_factor_GRP}} to make it clear that this is not a method, and \code{group_names.GRP} was renamed to \code{\link{GRPnames}} for the same reason and to increase parsimony. } \usage{ Recode(X, \dots, copy = FALSE, reserve.na.nan = TRUE, regex = FALSE) replace_non_finite(X, value = NA, replace.nan = TRUE) is.regular(x) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{a vector, matrix or data frame.} \item{x}{an R object. } \item{\dots}{comma-separated recode arguments of the form: \code{name = newname, `2` = 0, `NaN` = 0, `NA` = 0, `Inf` = NA, `-Inf` = NA}, etc\dots} \item{value}{a single (scalar) value to replace matching elements with. Default is \code{NA}.} \item{copy}{logical. For reciprocal or sequential replacements of the form \code{a = b, b = c} make a copy of \code{X} to prevent \code{a} being replaced with \code{b} and then all \code{b}-values being replaced with \code{c} again. In general \code{Recode} does the replacements one-after the other, starting with the first. } \item{reserve.na.nan}{logical. \code{TRUE} identifies \code{NA} and \code{NaN} as special numeric values and does the correct replacement. \code{FALSE} will treat \code{NA/NaN} as strings, and thus not match numeric \code{NA/NaN}. \emph{Note}: This is not an issue for \code{Inf/-Inf}, which are matched in both numeric and character variables. } \item{regex}{logical. If \code{TRUE}, all recode-argument names are (sequentially) passed to \code{\link{grepl}} as a pattern to search \code{X}. All matches are replaced.} \item{replace.nan}{logical. \code{TRUE} (default) replaces \code{NaN/Inf/-Inf}. \code{FALSE} replaces only \code{Inf/-Inf}.} } % \details{ %% ~~ If necessary, more details than the description above ~~ % } % \value{ %% ~Describe the value returned %% If it is a LIST, use %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} %% \dots % } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } \note{ \code{Recode} is not suitable for recoding factors or other classed objects / columns, it simply does \code{X[X == value] <- replacement} in a more efficient way. For classed objects, see for example \code{dplyr::recode}. } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \link[=recode-replace]{Recode and Replace Values}, \link[=collapse-documentation]{Collapse Overview} } \examples{ \dontrun{ Recode(c("a","b","c"), a = "b", b = "c") Recode(c("a","b","c"), a = "b", b = "c", copy = TRUE) Recode(c("a","b","c"), a = "b", b = "a", copy = TRUE) Recode(month.name, ber = NA, regex = TRUE) mtcr <- Recode(mtcars, `0` = 2, `4` = Inf, `1` = NaN) replace_non_finite(mtcr) replace_non_finite(mtcr, replace.nan = FALSE) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{documentation} collapse/man/qF.Rd0000644000176200001440000002006714170153716013476 0ustar liggesusers\name{qF-qG-finteraction} \alias{qF} \alias{qG} \alias{is_qG} \alias{as_factor_qG} \alias{finteraction} \title{ Fast Factor Generation, Interactions and Vector Grouping } \description{ \code{qF}, shorthand for 'quick-factor' implements very fast factor generation from atomic vectors using either radix ordering or index hashing. \code{qG}, shorthand for 'quick-group', generates a kind of factor-light without the levels attribute but instead an attribute providing the number of levels. Optionally the levels / groups can be attached, but without converting them to character. Objects have a class 'qG'. A multivariate version is provided by the function \code{\link{group}}. \code{finteraction} generates a factor by interacting multiple vectors or factors. In that process missing values are always replaced with a level and unused levels are always dropped. } \usage{ qF(x, ordered = FALSE, na.exclude = TRUE, sort = TRUE, drop = FALSE, keep.attr = TRUE, method = "auto") qG(x, ordered = FALSE, na.exclude = TRUE, sort = TRUE, return.groups = FALSE, method = "auto") is_qG(x) as_factor_qG(x, ordered = FALSE, na.exclude = TRUE) finteraction(\dots, ordered = FALSE, sort = TRUE, method = "auto") } \arguments{ \item{x}{a atomic vector, factor or quick-group.} \item{ordered}{logical. Adds a class 'ordered'.} \item{na.exclude}{logical. \code{TRUE} preserves missing values (i.e. no level is generated for \code{NA}).} \item{sort}{logical. \code{TRUE} sorts the levels in ascending order (like \code{\link{factor}}); \code{FALSE} provides the levels in order of first appearance, which can be significantly faster. Note that if a factor is passed, only \code{sort = FALSE} takes effect (as factors usually have sorted levels and checking sortedness can be expensive). } \item{drop}{logical. If \code{x} is a factor, \code{TRUE} efficiently drops unused factor levels beforehand using \code{\link{fdroplevels}}.} \item{keep.attr}{logical. If \code{TRUE} and \code{x} has additional attributes apart from 'levels' and 'class', these are preserved in the conversion to factor.} \item{method}{an integer or character string specifying the method of computation: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "auto" \tab\tab automatic selection: hash for character, logical, if \code{sort = FALSE} or if \code{length(x) < 500}, else radix. \cr 2 \tab\tab "radix" \tab\tab use radix ordering to generate factors. Supports \code{sort = FALSE} only for character vectors. See Details. \cr 3 \tab\tab "hash" \tab\tab use index hashing to generate factors. See Details. \cr } Note that for \code{finteraction}, \code{method = "hash"} is always unsorted. } \item{return.groups}{logical. \code{TRUE} returns the unique elements / groups / levels of \code{x} in an attribute called 'groups'. Unlike \code{qF}, they are not converted to character.} \item{\dots}{multiple atomic vectors or factors, or a single list of equal-length vectors or factors. See Details. } } \details{ These functions are quite important. Whenever a vector is passed to a \emph{collapse} function such as \code{fmean(mtcars, mtcars$cyl)}, is is grouped using \code{qF} or \code{qG}. \code{qF} is a combination of \code{as.factor} and \code{factor}. Applying it to a vector i.e. \code{qF(x)} gives the same result as \code{as.factor(x)}. \code{qF(x, ordered = TRUE)} generates an ordered factor (same as \code{factor(x, ordered = TRUE)}), and \code{qF(x, na.exclude = FALSE)} generates a level for missing values (same as \code{factor(x, exclude = NULL)}). An important addition is that \code{qF(x, na.exclude = FALSE)} also adds a class 'na.included'. This prevents \emph{collapse} functions from checking missing values in the factor, and is thus computationally more efficient. Therefore factors used in grouped operations should preferably be generated using \code{qF(x, na.exclude = FALSE)}. Setting \code{sort = FALSE} gathers the levels in first-appearance order (unless \code{method = "radix"} and \code{x} is numeric, in which case the levels are always sorted). This can provide a speed improvement, particularly for character data. % for non-numeric \code{x}. There are 3 methods of computation: radix ordering, index hashing, and hashing based on \code{\link{group}}. Radix ordering is done through combining the functions \code{\link{radixorder}} and \code{\link{groupid}}. It is generally faster than index hashing for large numeric data (although there are exceptions). Index hashing is done using \code{Rcpp::sugar::sort_unique} and \code{Rcpp::sugar::match}. It is generally faster for character data. If \code{sort = FALSE}, \code{\link{group}} is used which is also very fast. % The hashing methods have very fast For logical data, a super fast one-pass method was written which is subsumed in the hash method. Regarding speed: In general \code{qF} is around 5x faster than \code{as.factor} on character data and about 30x faster on numeric data. Automatic method dispatch typically does a good job delivering optimal performance. \code{qG} is in the first place a programmers function. It generates a factor-'light' consisting of only an integer grouping vector and an attribute providing the number of groups. It is slightly faster and more memory efficient than \code{\link{GRP}} for grouping atomic vectors, which is the main reason it exists. The fact that it (optionally) returns the unique groups / levels without converting them to character is an added bonus (this also provides a small performance gain compared to \code{qF}). Since v1.7, you can also call a C-level function \code{\link{group}} directly, which works for multivariate data as well, but does not sort the data and does not preserve missing values. \code{finteraction} is simply a wrapper around \code{as_factor_GRP(GRP.default(X))}, where X is replaced by the arguments in '\dots' combined in a list (so it's not really an interaction function but just a multivariate grouping converted to factor, see \code{\link{GRP}} for computational details). In general: All vectors, factors, or lists of vectors / factors passed can be interacted. Interactions always create a level for missing values and always drop any unused levels. } \value{ \code{qF} and \code{finteraction} return an (ordered) factor. \code{qG} returns an object of class 'qG': an integer grouping vector with an attribute 'N.groups' indicating the number of groups, and, if \code{return.groups = TRUE}, an attribute 'groups' containing the vector of unique groups / elements in \code{x} corresponding to the integer-id. } \note{ Neither \code{qF} nor \code{qG} reorder groups / factor levels. An exception was added in v1.7, when calling \code{qF(f, sort = FALSE)} on a factor \code{f}, the levels are recast in first appearance order. These objects can however be converted into one another using \code{qF/qG} or the direct method \code{as_factor_qG}, and it is also possible to add a class 'ordered' (\code{ordered = TRUE}) and to create am extra level / integer for missing values (\code{na.exclude = FALSE}). % Apart from that \code{qF} and \code{qG} don't do much to each others objects. } \seealso{ \code{\link{group}}, \code{\link{groupid}}, \code{\link{GRP}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \examples{ cylF <- qF(mtcars$cyl) # Factor from atomic vector cylG <- qG(mtcars$cyl) # Quick-group from atomic vector cylG # See the simple structure of this object cf <- qF(wlddev$country) # Bigger data cf2 <- qF(wlddev$country, na.exclude = FALSE) # With na.included class dat <- num_vars(wlddev) \donttest{ % No code relying on suggested package # cf2 is faster in grouped operations because no missing value check is performed library(microbenchmark) microbenchmark(fmax(dat, cf), fmax(dat, cf2)) } finteraction(mtcars$cyl, mtcars$vs) # Interacting two variables (can be factors) head(finteraction(mtcars)) # A more crude example.. } \keyword{manip} collapse/man/fdroplevels.Rd0000644000176200001440000000442614167156135015462 0ustar liggesusers\name{fdroplevels} \alias{fdroplevels} \alias{fdroplevels.factor} \alias{fdroplevels.data.frame} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Removal of Unused Factor Levels } \description{ A substantially faster replacement for \code{\link{droplevels}}. } \usage{ fdroplevels(x, ...) \method{fdroplevels}{factor}(x, ...) \method{fdroplevels}{data.frame}(x, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a factor, or data frame / list containing one or more factors.} \item{\dots}{not used.} } \details{ \code{\link{droplevels}} passes a factor from which levels are to be dropped to \code{\link{factor}}, which first calls \code{\link{unique}} and then \code{\link{match}} to drop unused levels. Both functions internally use a hash table, which is highly inefficient. \code{fdroplevels} does not require mapping values at all, but uses a super fast boolean vector method to determine which levels are unused and remove those levels. In addition, if no unused levels are found, \code{x} is simply returned. Any missing values found in \code{x} are efficiently skipped in the process of checking and replacing levels. All other attributes of \code{x} are preserved. } \value{ \code{x} will any unused factor levels removed. } \note{ If \code{x} is malformed i.e. has too few levels, this function can cause a segmentation fault, thus only use with ordinary / proper factors. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{qF}}, \code{\link{funique}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \examples{ f <- iris$Species[1:100] fdroplevels(f) identical(fdroplevels(f), droplevels(f)) fNA <- na_insert(f) fdroplevels(fNA) identical(fdroplevels(fNA), droplevels(fNA)) identical(fdroplevels(ss(iris, 1:100)), droplevels(ss(iris, 1:100))) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/wlddev.Rd0000644000176200001440000000613414066070071014410 0ustar liggesusers\name{wlddev} \alias{wlddev} \docType{data} \title{ World Development Dataset } \description{ This dataset contains 5 indicators from the World Bank's World Development Indicators (WDI) database: (1) GDP per capita, (2) Life expectancy at birth, (3) GINI index, (4) Net ODA and official aid received and (5) Population. The panel data is balanced and covers 216 present and historic countries from 1960-2020 (World Bank aggregates and regional entities are excluded). Apart from the indicators the data contains a number of identifiers (character country name, factor ISO3 country code, World Bank region and income level, numeric year and decade) and 2 generated variables: A logical variable indicating whether the country is an OECD member, and a fictitious variable stating the date the data was recorded. These variables were added so that all common data-types are represented in this dataset, making it an ideal test-dataset for certain \emph{collapse} functions. } \usage{data("wlddev")} \format{ A data frame with 13176 observations on the following 13 variables. All variables are labeled e.g. have a 'label' attribute. \describe{ \item{\code{country}}{\emph{chr} Country Name} \item{\code{iso3c}}{\emph{fct} Country Code} \item{\code{date}}{\emph{date} Date Recorded (Fictitious)} \item{\code{year}}{\emph{int} Year} \item{\code{decade}}{\emph{int} Decade} \item{\code{region}}{\emph{fct} World Bank Region} \item{\code{income}}{\emph{fct} World Bank Income Level} \item{\code{OECD}}{\emph{log} Is OECD Member Country?} \item{\code{PCGDP}}{\emph{num} GDP per capita (constant 2010 US$)} \item{\code{LIFEEX}}{\emph{num} Life expectancy at birth, total (years)} \item{\code{GINI}}{\emph{num} GINI index (World Bank estimate)} \item{\code{ODA}}{\emph{num} Net official development assistance and official aid received (constant 2018 US$)} \item{\code{POP}}{\emph{num} Population, total} } } % \details{ %% ~~ If necessary, more details than the __description__ above ~~ % } \source{ \url{https://data.worldbank.org/}, accessed via the \code{WDI} package. The codes for the series are \code{c("NY.GDP.PCAP.KD", "SP.DYN.LE00.IN", "SI.POV.GINI", "DT.ODA.ALLD.KD", "SP.POP.TOTL")}. } % \references{ %% ~~ possibly secondary sources and usages ~~ % } \seealso{ \code{\link{GGDC10S}}, \link[=collapse-documentation]{Collapse Overview} } \examples{ data(wlddev) # Panel-summarizing the 5 series qsu(wlddev, pid = ~iso3c, cols = 9:13, vlabels = TRUE) # By Region qsu(wlddev, by = ~region, cols = 9:13, vlabels = TRUE) # Panel-summary by region qsu(wlddev, by = ~region, pid = ~iso3c, cols = 9:13, vlabels = TRUE) # Pairwise correlations: Ovarall print(pwcor(get_vars(wlddev, 9:13), N = TRUE, P = TRUE), show = "lower.tri") # Pairwise correlations: Between Countries print(pwcor(fmean(get_vars(wlddev, 9:13), wlddev$iso3c), N = TRUE, P = TRUE), show = "lower.tri") # Pairwise correlations: Within Countries print(pwcor(fwithin(get_vars(wlddev, 9:13), wlddev$iso3c), N = TRUE, P = TRUE), show = "lower.tri") } \keyword{datasets} collapse/man/pwcor_pwcov_pwnobs.Rd0000644000176200001440000001014514176642305017067 0ustar liggesusers\name{pwcor-pwcov-pwnobs} \alias{pwcor} \alias{pwcov} \alias{pwnobs} \alias{print.pwcov} \alias{print.pwcor} %- Also NEED an '\alias' for EACH other topic documented here. \title{ (Pairwise, Weighted) Correlations, Covariances and Observation Counts } \description{ Computes (pairwise, weighted) Pearsons correlations, covariances and observation counts. Pairwise correlations and covariances can be computed together with observation counts and p-values, and output as 3D array (default) or list of matrices. \code{pwcor} and \code{pwcov} offer an elaborate print method. } \usage{ pwcor(X, \dots, w = NULL, N = FALSE, P = FALSE, array = TRUE, use = "pairwise.complete.obs") pwcov(X, \dots, w = NULL, N = FALSE, P = FALSE, array = TRUE, use = "pairwise.complete.obs") pwnobs(X) \method{print}{pwcor}(x, digits = 2L, sig.level = 0.05, show = c("all","lower.tri","upper.tri"), spacing = 1L, return = FALSE, \dots) \method{print}{pwcov}(x, digits = 2L, sig.level = 0.05, show = c("all","lower.tri","upper.tri"), spacing = 1L, return = FALSE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{a matrix or data.frame, for \code{pwcor} and \code{pwcov} all columns must be numeric. All functions are faster on matrices, so converting is advised for large data (see \code{\link{qM}}).} \item{x}{an object of class 'pwcor' / 'pwcov'. } \item{w}{numeric. A vector of (frequency) weights. } \item{N}{logical. \code{TRUE} also computes pairwise observation counts.} \item{P}{logical. \code{TRUE} also computes pairwise p-values (same as \code{\link{cor.test}} and \code{Hmisc::rcorr}).} \item{array}{logical. If \code{N = TRUE} or \code{P = TRUE}, \code{TRUE} (default) returns output as 3D array whereas \code{FALSE} returns a list of matrices.} \item{use}{argument passed to \code{\link{cor}} / \code{\link{cov}}. If \code{use != "pairwise.complete.obs"}, \code{sum(complete.cases(X))} is used for \code{N}, and p-values are computed accordingly. } \item{digits}{integer. The number of digits to round to in print. } \item{sig.level}{numeric. P-value threshold below which a \code{'*'} is displayed above significant coefficients if \code{P = TRUE}. } \item{show}{character. The part of the correlation / covariance matrix to display. } \item{spacing}{integer. Controls the spacing between different reported quantities in the printout of the matrix: 0 - compressed, 1 - single space, 2 - double space.} \item{return}{logical. \code{TRUE} returns the formatted object from the print method for exporting. The default is to return \code{x} invisibly.} \item{\dots}{other arguments passed to \code{\link{cor}} or \code{\link{cov}}. Only sensible if \code{P = FALSE}. } } \value{ a numeric matrix, 3D array or list of matrices with the computed statistics. For \code{pwcor} and \code{pwcov} the object has a class 'pwcor' and 'pwcov', respectively. } \note{ \code{weights::wtd.cors} is imported for weighted pairwise correlations (written in C for speed). For weighted correlations with bootstrap SE's see \code{weights::wtd.cor} (but bootstrap can be slow). Weighted correlations for complex surveys are implemented in \code{jtools::svycor}. An equivalent and faster implementation of \code{pwcor} (without weights) is provided in \code{Hmisc::rcorr} (written in Fortran). } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{qsu}}, \link[=summary-statistics]{Summary Statistics}, \link[=collapse-documentation]{Collapse Overview} %% ~~objects to See Also as } \examples{ mna <- na_insert(mtcars) pwcor(mna) pwcov(mna) pwnobs(mna) pwcor(mna, N = TRUE) pwcor(mna, P = TRUE) pwcor(mna, N = TRUE, P = TRUE) aperm(pwcor(mna, N = TRUE, P = TRUE)) print(pwcor(mna, N = TRUE, P = TRUE), digits = 3, sig.level = 0.01, show = "lower.tri") pwcor(mna, N = TRUE, P = TRUE, array = FALSE) print(pwcor(mna, N = TRUE, P = TRUE, array = FALSE), show = "lower.tri") } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate} % use one of RShowDoc("KEYWORDS") collapse/man/flm.Rd0000644000176200001440000001170514167156464013716 0ustar liggesusers\name{flm} \alias{flm} \title{ Fast (Weighted) Linear Model Fitting % Sparse, } \description{ \code{flm} is a fast linear model command that takes matrices as input and (by default) only returns a coefficient matrix. 6 different efficient fitting methods are implemented: 4 using base R linear algebra, and 2 utilizing the \emph{RcppArmadillo} and \emph{RcppEigen} packages. The function itself only has an overhead of 5-10 microseconds, and is thus well suited as a bootstrap workhorse. } \usage{ flm(y, X, w = NULL, add.icpt = FALSE, return.raw = FALSE, % sparse = FALSE method = c("lm", "solve", "qr", "arma", "chol", "eigen"), eigen.method = 3L, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{y}{a response vector or matrix. Multiple dependent variables are only supported by methods "lm", "solve", "qr" and "chol".} \item{X}{a matrix of regressors.} \item{w}{a weight vector.} \item{add.icpt}{logical. \code{TRUE} adds an intercept column named '(Intercept)' to \code{X}.} % \item{sparse}{logical. \code{TRUE} coerces \code{X} to a sparse matrix using \code{as(X, "dgCMatrix")}.} \item{return.raw}{logical. \code{TRUE} returns the original output from the different methods. For 'lm', 'arma' and 'eigen', this includes additional statistics such as residuals, fitted values or standard errors. The other methods just return coefficients but in different formats. } \item{method}{an integer or character string specifying the method of computation: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "lm" \tab\tab uses \code{\link{.lm.fit}}. \cr 2 \tab\tab "solve" \tab\tab \code{solve(crossprod(X), crossprod(X, y))}. \cr 3 \tab\tab "qr" \tab\tab \code{qr.coef(qr(X), y)}. \cr 4 \tab\tab "arma" \tab\tab uses \code{RcppArmadillo::fastLmPure}. \cr 5 \tab\tab "chol" \tab\tab \code{chol2inv(chol(crossprod(X))) \%*\% crossprod(X, y)} (quite fast but requires \code{crossprod(X)} to be positive definite). \cr 6 \tab\tab "eigen" \tab\tab uses \code{RcppEigen::fastLmPure} (very fast but potentially unstable, depending on the method). \cr } } \item{eigen.method}{integer. Select the method of computation used by \code{RcppEigen::fastLmPure}: \tabular{lll}{\emph{ Int. } \tab\tab \emph{ Description } \cr 0 \tab\tab column-pivoted QR decomposition. \cr 1 \tab\tab unpivoted QR decomposition. \cr 2 \tab\tab LLT Cholesky. \cr 3 \tab\tab LDLT Cholesky. \cr 4 \tab\tab Jacobi singular value decomposition (SVD). \cr 5 \tab\tab method based on the eigenvalue-eigenvector decomposition of X'X. \cr } See \code{vignette("RcppEigen-Introduction", package = "RcppEigen")} for details on these methods and benchmark results. Run \code{source(system.file("examples", "lmBenchmark.R", package = "RcppEigen"))} to re-run the benchmark on your machine. } \item{...}{further arguments passed to other methods. Sensible choices are \code{tol = value} - a numerical tolerance for the solution - applicable with methods "lm", "solve" and "qr" (default is \code{1e-7}), or \code{LAPACK = TRUE} with method "qr" to use LAPACK routines to for the qr decomposition (typically faster than LINPACK (the default)).} } % \details{ %% ~~ If necessary, more details than the description above ~~ % } \value{ If \code{return.raw = FALSE}, a matrix of coefficients with the rows corresponding to the columns of \code{X}, otherwise the raw results from the various methods are returned. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } \note{ Method "qr" supports sparse matrices, so for an \code{X} matrix with many dummy variables consider method "qr" passing \code{as(X, "dgCMatrix")} instead of just \code{X}. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link[=HDW]{fhdwithin/HDW}}, \code{\link{fFtest}}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ coef <- flm(mtcars$mpg, qM(mtcars[c("hp","carb")]), mtcars$wt, add.icpt = TRUE) coef lmcoef <- coef(lm(mpg ~ hp + carb, weights = wt, mtcars)) lmcoef all.equal(drop(coef), lmcoef) \donttest{ % Need RcppArmadillo and RcppEigen all_obj_equal(lapply(1:6, function(i) flm(mtcars$mpg, qM(mtcars[c("hp","carb")]), mtcars$wt, add.icpt = TRUE, method = i))) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/fbetween_fwithin.Rd0000644000176200001440000003423314167156464016470 0ustar liggesusers\name{fbetween-fwithin} \alias{B} \alias{B.default} \alias{B.matrix} \alias{B.data.frame} \alias{B.pseries} \alias{B.pdata.frame} \alias{B.grouped_df} \alias{W} \alias{W.default} \alias{W.matrix} \alias{W.data.frame} \alias{W.pseries} \alias{W.pdata.frame} \alias{W.grouped_df} \alias{fbetween} \alias{fbetween.default} \alias{fbetween.matrix} \alias{fbetween.data.frame} \alias{fbetween.pseries} \alias{fbetween.pdata.frame} \alias{fbetween.grouped_df} \alias{fwithin} \alias{fwithin.default} \alias{fwithin.matrix} \alias{fwithin.data.frame} \alias{fwithin.pseries} \alias{fwithin.pdata.frame} \alias{fwithin.grouped_df} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Between (Averaging) and (Quasi-)Within (Centering) Transformations } \description{ \code{fbetween} and \code{fwithin} are S3 generics to efficiently obtain between-transformed (averaged) or (quasi-)within-transformed (demeaned) data. These operations can be performed groupwise and/or weighted. \code{B} and \code{W} are wrappers around \code{fbetween} and \code{fwithin} representing the 'between-operator' and the 'within-operator'. (\code{B} / \code{W} provide more flexibility than \code{fbetween} / \code{fwithin} when applied to data frames (i.e. column subsetting, formula input, auto-renaming and id-variable-preservation capabilities\dots), but are otherwise identical.) %(\code{fbetween} and \code{fwithin} are simple programmers functions in style of the \link[=fast-statistical-functions]{Fast Statistical Functions} while \code{B} and \code{W} are more practical to use in regression formulas or for ad-hoc computations on data frames.) } \usage{ fbetween(x, \dots) fwithin(x, \dots) B(x, \dots) W(x, \dots) \method{fbetween}{default}(x, g = NULL, w = NULL, na.rm = TRUE, fill = FALSE, \dots) \method{fwithin}{default}(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, theta = 1, \dots) \method{B}{default}(x, g = NULL, w = NULL, na.rm = TRUE, fill = FALSE, \dots) \method{W}{default}(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, theta = 1, \dots) \method{fbetween}{matrix}(x, g = NULL, w = NULL, na.rm = TRUE, fill = FALSE, \dots) \method{fwithin}{matrix}(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, theta = 1, \dots) \method{B}{matrix}(x, g = NULL, w = NULL, na.rm = TRUE, fill = FALSE, stub = "B.", \dots) \method{W}{matrix}(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, theta = 1, stub = "W.", \dots) \method{fbetween}{data.frame}(x, g = NULL, w = NULL, na.rm = TRUE, fill = FALSE, \dots) \method{fwithin}{data.frame}(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, theta = 1, \dots) \method{B}{data.frame}(x, by = NULL, w = NULL, cols = is.numeric, na.rm = TRUE, fill = FALSE, stub = "B.", keep.by = TRUE, keep.w = TRUE, \dots) \method{W}{data.frame}(x, by = NULL, w = NULL, cols = is.numeric, na.rm = TRUE, mean = 0, theta = 1, stub = "W.", keep.by = TRUE, keep.w = TRUE, \dots) # Methods for compatibility with plm: \method{fbetween}{pseries}(x, effect = 1L, w = NULL, na.rm = TRUE, fill = FALSE, \dots) \method{fwithin}{pseries}(x, effect = 1L, w = NULL, na.rm = TRUE, mean = 0, theta = 1, \dots) \method{B}{pseries}(x, effect = 1L, w = NULL, na.rm = TRUE, fill = FALSE, \dots) \method{W}{pseries}(x, effect = 1L, w = NULL, na.rm = TRUE, mean = 0, theta = 1, \dots) \method{fbetween}{pdata.frame}(x, effect = 1L, w = NULL, na.rm = TRUE, fill = FALSE, \dots) \method{fwithin}{pdata.frame}(x, effect = 1L, w = NULL, na.rm = TRUE, mean = 0, theta = 1, \dots) \method{B}{pdata.frame}(x, effect = 1L, w = NULL, cols = is.numeric, na.rm = TRUE, fill = FALSE, stub = "B.", keep.ids = TRUE, keep.w = TRUE, \dots) \method{W}{pdata.frame}(x, effect = 1L, w = NULL, cols = is.numeric, na.rm = TRUE, mean = 0, theta = 1, stub = "W.", keep.ids = TRUE, keep.w = TRUE, \dots) # Methods for grouped data frame / compatibility with dplyr: \method{fbetween}{grouped_df}(x, w = NULL, na.rm = TRUE, fill = FALSE, keep.group_vars = TRUE, keep.w = TRUE, \dots) \method{fwithin}{grouped_df}(x, w = NULL, na.rm = TRUE, mean = 0, theta = 1, keep.group_vars = TRUE, keep.w = TRUE, \dots) \method{B}{grouped_df}(x, w = NULL, na.rm = TRUE, fill = FALSE, stub = "B.", keep.group_vars = TRUE, keep.w = TRUE, \dots) \method{W}{grouped_df}(x, w = NULL, na.rm = TRUE, mean = 0, theta = 1, stub = "W.", keep.group_vars = TRUE, keep.w = TRUE, \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame, panel series (class \code{pseries} of package \code{plm}), panel data frame (\code{plm::pdata.frame}) or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{by}{\emph{B and W data.frame method}: Same as g, but also allows one- or two-sided formulas i.e. \code{~ group1} or \code{var1 + var2 ~ group1 + group2}. See Examples.} \item{w}{a numeric vector of (non-negative) weights. \code{B}/\code{W} data frame and \code{pdata.frame} methods also allow a one-sided formula i.e. \code{~ weightcol}. The \code{grouped_df} (\emph{dplyr}) method supports lazy-evaluation. See Examples.} \item{cols}{\emph{data.frame method}: Select columns to center/average using a function, column names, indices or a logical vector. Default: All numeric variables. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{na.rm}{logical. Skip missing values in \code{x} and \code{w} when computing averages. If \code{na.rm = FALSE} and a \code{NA} or \code{NaN} is encountered, the average for that group will be \code{NA}, and all data points belonging to that group in the output vector will also be \code{NA}.} \item{effect}{\emph{plm} methods: Select which panel identifier should be used as grouping variable. 1L takes the first variable in the \code{plm::index}, 2L the second etc. Index variables can also be called by name using a character string. If more than one variable is supplied, the corresponding index-factors are interacted. } \item{stub}{a prefix or stub to rename all transformed columns. \code{FALSE} will not rename columns.} \item{fill}{\emph{option to \code{fbetween}/\code{B}}: Logical. \code{TRUE} will overwrite missing values in \code{x} with the respective average. By default missing values in \code{x} are preserved.} \item{mean}{\emph{option to \code{fwithin}/\code{W}}: The mean to center on, default is 0, but a different mean can be supplied and will be added to the data after the centering is performed. A special option when performing grouped centering is \code{mean = "overall.mean"}. In that case the overall mean of the data will be added after subtracting out group means.} \item{theta}{\emph{option to \code{fwithin}/\code{W}}: Double. An optional scalar parameter for quasi-demeaning i.e. \code{x - theta * xi.}. This is useful for variance components ('random-effects') estimators. see Details.} \item{keep.by, keep.ids, keep.group_vars}{\emph{B and W data.frame, pdata.frame and grouped_df methods}: Logical. Retain grouping / panel-identifier columns in the output. For data frames this only works if grouping variables were passed in a formula.} \item{keep.w}{\emph{B and W data.frame, pdata.frame and grouped_df methods}: Logical. Retain column containing the weights in the output. Only works if \code{w} is passed as formula / lazy-expression.} \item{\dots}{arguments to be passed to or from other methods.} } \details{ Without groups, \code{fbetween}/\code{B} replaces all data points in \code{x} with their mean or weighted mean (if \code{w} is supplied). Similarly \code{fwithin/W} subtracts the (weighted) mean from all data points i.e. centers the data on the mean. \cr With groups supplied to \code{g}, the replacement / centering performed by \code{fbetween/B} | \code{fwithin/W} becomes groupwise. In terms of panel data notation: If \code{x} is a vector in such a panel dataset, \code{xit} denotes a single data-point belonging to group \code{i} in time-period \code{t} (\code{t} need not be a time-period). Then \code{xi.} denotes \code{x}, averaged over \code{t}. \code{fbetween}/\code{B} now returns \code{xi.} and \code{fwithin}/\code{W} returns \code{x - xi.}. Thus for any data \code{x} and any grouping vector \code{g}: \code{B(x,g) + W(x,g) = xi. + x - xi. = x}. In terms of variance, \code{fbetween/B} only retains the variance between group averages, while \code{fwithin}/\code{W}, by subtracting out group means, only retains the variance within those groups. \cr The data replacement performed by \code{fbetween}/\code{B} can keep (default) or overwrite missing values (option \code{fill = TRUE}) in \code{x}. \code{fwithin/W} can center data simply (default), or add back a mean after centering (option \code{mean = value}), or add the overall mean in groupwise computations (option \code{mean = "overall.mean"}). Let \code{x..} denote the overall mean of \code{x}, then \code{fwithin}/\code{W} with \code{mean = "overall.mean"} returns \code{x - xi. + x..} instead of \code{x - xi.}. This is useful to get rid of group-differences but preserve the overall level of the data. In regression analysis, centering with \code{mean = "overall.mean"} will only change the constant term. See Examples. If \code{theta != 1}, \code{fwithin}/\code{W} performs quasi-demeaning \code{x - theta * xi.}. If \code{mean = "overall.mean"}, \code{x - theta * xi. + theta * x..} is returned, so that the mean of the partially demeaned data is still equal to the overall data mean \code{x..}. A numeric value passed to \code{mean} will simply be added back to the quasi-demeaned data i.e. \code{x - theta * xi. + mean}. Now in the case of a linear panel model \eqn{y_{it} = \beta_0 + \beta_1 X_{it} + u_{it}} with \eqn{u_{it} = \alpha_i + \epsilon_{it}}. If \eqn{\alpha_i \neq \alpha = const.} (there exists individual heterogeneity), then pooled OLS is at least inefficient and inference on \eqn{\beta_1} is invalid. If \eqn{E[\alpha_i|X_{it}] = 0} (mean independence of individual heterogeneity \eqn{\alpha_i}), the variance components or 'random-effects' estimator provides an asymptotically efficient FGLS solution by estimating a transformed model \eqn{y_{it}-\theta y_{i.} = \beta_0 + \beta_1 (X_{it} - \theta X_{i.}) + (u_{it} - \theta u_{i.}}), where \eqn{\theta = 1 - \frac{\sigma_\alpha}{\sqrt(\sigma^2_\alpha + T \sigma^2_\epsilon)}}. An estimate of \eqn{\theta} can be obtained from the an estimate of \eqn{\hat{u}_{it}} (the residuals from the pooled model). If \eqn{E[\alpha_i|X_{it}] \neq 0}, pooled OLS is biased and inconsistent, and taking \eqn{\theta = 1} gives an unbiased and consistent fixed-effects estimator of \eqn{\beta_1}. See Examples. } \value{ \code{fbetween}/\code{B} returns \code{x} with every element replaced by its (groupwise) mean (\code{xi.}). Missing values are preserved if \code{fill = FALSE} (the default). \code{fwithin/W} returns \code{x} where every element was subtracted its (groupwise) mean (\code{x - theta * xi. + mean} or, if \code{mean = "overall.mean"}, \code{x - theta * xi. + theta * x..}). See Details. } \references{ Mundlak, Yair. 1978. On the Pooling of Time Series and Cross Section Data. \emph{Econometrica} 46 (1): 69-85. } % \author{ %% ~~who you are~~ % } % \note{ %% ~~further notes~~ % } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link[=HDW]{fhdbetween/HDB and fhdwithin/HDW}}, \code{\link[=fscale]{fscale/STD}}, \code{\link{TRA}}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Simple centering and averaging head(fbetween(mtcars)) head(B(mtcars)) head(fwithin(mtcars)) head(W(mtcars)) all.equal(fbetween(mtcars) + fwithin(mtcars), mtcars) ## Groupwise centering and averaging head(fbetween(mtcars, mtcars$cyl)) head(fwithin(mtcars, mtcars$cyl)) all.equal(fbetween(mtcars, mtcars$cyl) + fwithin(mtcars, mtcars$cyl), mtcars) head(W(wlddev, ~ iso3c, cols = 9:13)) # Center the 5 series in this dataset by country head(cbind(get_vars(wlddev,"iso3c"), # Same thing done manually using fwithin.. add_stub(fwithin(get_vars(wlddev,9:13), wlddev$iso3c), "W."))) ## Using B() and W() for fixed-effects regressions: # Several ways of running the same regression with cyl-fixed effects lm(W(mpg,cyl) ~ W(carb,cyl), data = mtcars) # Centering each individually lm(mpg ~ carb, data = W(mtcars, ~ cyl, stub = FALSE)) # Centering the entire data lm(mpg ~ carb, data = W(mtcars, ~ cyl, stub = FALSE, # Here only the intercept changes mean = "overall.mean")) lm(mpg ~ carb + B(carb,cyl), data = mtcars) # Procedure suggested by # ..Mundlak (1978) - partialling out group averages amounts to the same as demeaning the data \donttest{ % No code relying on suggested package plm::plm(mpg ~ carb, mtcars, index = "cyl", model = "within") # "Proof".. } # This takes the interaction of cyl, vs and am as fixed effects lm(W(mpg,list(cyl,vs,am)) ~ W(carb,list(cyl,vs,am)), data = mtcars) lm(mpg ~ carb, data = W(mtcars, ~ cyl + vs + am, stub = FALSE)) lm(mpg ~ carb + B(carb,list(cyl,vs,am)), data = mtcars) # Now with cyl fixed effects weighted by hp: lm(W(mpg,cyl,hp) ~ W(carb,cyl,hp), data = mtcars) lm(mpg ~ carb, data = W(mtcars, ~ cyl, ~ hp, stub = FALSE)) lm(mpg ~ carb + B(carb,cyl,hp), data = mtcars) # WRONG ! Gives a different coefficient!! ## Manual variance components (random-effects) estimation res <- HDW(mtcars, mpg ~ carb)[[1]] # Get residuals from pooled OLS sig2_u <- fvar(res) sig2_e <- fvar(fwithin(res, mtcars$cyl)) T <- length(res) / fndistinct(mtcars$cyl) sig2_alpha <- sig2_u - sig2_e theta <- 1 - sqrt(sig2_alpha) / sqrt(sig2_alpha + T * sig2_e) lm(mpg ~ carb, data = W(mtcars, ~ cyl, theta = theta, mean = "overall.mean", stub = FALSE)) \donttest{ % No code relying on suggested package # A slightly different method to obtain theta... plm::plm(mpg ~ carb, mtcars, index = "cyl", model = "random") } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line collapse/man/data-transformations.Rd0000644000176200001440000001612314167161710017265 0ustar liggesusers\name{data-transformations} \alias{A6-data-transformations} \alias{data-transformations} \alias{.OPERATOR_FUN} \title{Data Transformations} % \emph{collapse} \description{ \emph{collapse} provides an ensemble of functions to perform common data transformations efficiently and user friendly: \itemize{ \item \code{\link{dapply}} \bold{applies functions to rows or columns} of matrices and data frames, preserving the data format. \item \code{\link{BY}} is an S3 generic for \bold{Split-Apply-Combine computing} and can perform aggregation as well as grouped transformations (for aggregation please also see \code{\link{collap}} and the \link[=fast-statistical-functions]{Fast Statistical Functions}). \item A set of arithmetic operators facilitates \bold{row-wise} \code{\link{\%rr\%}}, \code{\link{\%r+\%}}, \code{\link{\%r-\%}}, \code{\link{\%r*\%}}, \code{\link{\%r/\%}} and \bold{column-wise} \code{\link{\%cr\%}}, \code{\link{\%c+\%}}, \code{\link{\%c-\%}}, \code{\link{\%c*\%}}, \code{\link{\%c/\%}} \bold{replacing and sweeping operations} involving a vector and a matrix or data frame / list. Since v1.7, the operators \code{\link{\%+=\%}}, \code{\link{\%-=\%}}, \code{\link{\%*=\%}} and \code{\link{\%/=\%}} do column- and element- wise math by reference, and the function \code{\link{setop}} can also perform sweeping out rows by reference. \item \code{\link{TRA}} is a more advanced S3 generic to efficiently perform \bold{(groupwise) replacing and sweeping out of statistics}. %The basic syntax is \code{TRA(x, xag, g)} where \code{x} is data to be transformed, \code{xag} is some set of aggregate statistics to tranform \code{x} and \code{g} is an optional grouping vector for grouped transformations. Supported operations are: \tabular{lllll}{\emph{ Integer-id } \tab\tab \emph{ String-id } \tab\tab \emph{ Description } \cr % \Sexpr{"\u200B"} \Sexpr{"\u200B"} % \Sexpr{"\u200B"} \Sexpr{"\u200B"} 1 \tab\tab "replace_fill" \tab\tab replace and overwrite missing values \cr 2 \tab\tab "replace" \tab\tab replace but preserve missing values \cr 3 \tab\tab "-" \tab\tab subtract \cr 4 \tab\tab "-+" \tab\tab subtract group-statistics but add group-frequency weighted average of group statistics \cr 5 \tab\tab "/" \tab\tab divide \cr 6 \tab\tab "\%" \tab\tab compute percentages \cr 7 \tab\tab "+" \tab\tab add \cr 8 \tab\tab "*" \tab\tab multiply \cr 9 \tab\tab "\%\%" \tab\tab modulus \cr 10 \tab\tab "-\%\%" \tab\tab subtract modulus } All of \emph{collapse}'s \link[=fast-statistical-functions]{Fast Statistical Functions} have a built-in \code{TRA} argument for faster access (i.e. you can compute (groupwise) statistics and use them to transform your data with a single function call). \item \code{\link[=fscale]{fscale/STD}} is an S3 generic to perform (groupwise and / or weighted) \bold{scaling / standardizing} of data and is orders of magnitude faster than \code{\link{scale}}. \item \code{\link[=fwithin]{fwithin/W}} is an S3 generic to efficiently perform (groupwise and / or weighted) \bold{within-transformations / demeaning / centering} of data. Similarly \code{\link[=fbetween]{fbetween/B}} computes (groupwise and / or weighted) \bold{between-transformations / averages} (also a lot faster than \code{\link{ave}}). \item \code{\link[=HDW]{fhdwithin/HDW}}, shorthand for 'higher-dimensional within transform', is an S3 generic to efficiently \bold{center data on multiple groups and partial-out linear models} (possibly involving many levels of fixed effects). In other words, \code{\link[=HDW]{fhdwithin/HDW}} efficiently computes \bold{residuals} from (potentially complex) linear models. Similarly \code{\link[=HDB]{fhdbetween/HDB}}, shorthand for 'higher-dimensional between transformation', computes the corresponding means or \bold{fitted values}. %\item \code{flm} is an efficient function for bare-bones (weighted) \bold{linear model fitting}. It supports 6 different fitting methods, 4 from base R, and 2 utilizing the \emph{RcppArmadillo} or \emph{RcppEigen} packages. \item \code{\link[=flag]{flag/L/F}}, \code{\link[=fdiff]{fdiff/D/Dlog}} and \code{\link[=fgrowth]{fgrowth/G}} are S3 generics to compute sequences of \bold{lags / leads} and suitably lagged and iterated (quasi-, log-) \bold{differences} and \bold{growth rates} on time series and panel data. \code{\link{fcumsum}} flexibly computes cumulative sums. More in \link[=time-series-panel-series]{Time Series and Panel Series}. \item \code{STD, W, B, HDW, HDB, L, D, Dlog} and \code{G} are parsimonious wrappers around the \code{f-} functions above representing the corresponding transformation 'operators'. They have additional capabilities when applied to data-frames (i.e. variable selection, formula input, auto-renaming and id-variable preservation), and are easier to employ in regression formulas, but are otherwise identical in functionality. } } \section{Table of Functions}{ \tabular{lllll}{\emph{ Function / S3 Generic } \tab\tab \emph{ Methods } \tab\tab \emph{ Description } \cr % \Sexpr{"\u200B"} \Sexpr{"\u200B"} % \Sexpr{"\u200B"} \Sexpr{"\u200B"} \code{\link{dapply}} \tab\tab No methods, works with matrices and data frames \tab\tab Apply functions to rows or columns \cr \code{\link{BY}} \tab\tab \code{default, matrix, data.frame, grouped_df} \tab\tab Split-Apply-Combine computing \cr \code{\link[=arithmetic]{\%(r/c)(r/+/-/*//)\%}} \tab\tab No methods, works with matrices and data frames / lists \tab\tab Row- and column-arithmetic \cr \code{\link{TRA}} \tab\tab \code{default, matrix, data.frame, grouped_df} \tab\tab Replace and sweep out statistics \cr \code{\link[=fscale]{fscale/STD}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Scale / standardize data \cr \code{\link[=fwithin]{fwithin/W}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Demean / center data \cr \code{\link[=fbetween]{fbetween/B}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Compute means / average data \cr \code{\link[=HDW]{fhdwithin/HDW}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame} \tab\tab High-dimensional centering and lm residuals \cr \code{\link[=HDB]{fhdbetween/HDB}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame} \tab\tab High-dimensional averages and lm fitted values \cr % \code{\link{flm}} \tab\tab No methods, for matrices \tab\tab Linear model fitting \cr \code{\link[=flag]{flag/L/F}}, \code{\link[=fdiff]{fdiff/D/Dlog}}, \code{\link[=fdiff]{fgrowth/G}}, \code{\link{fcumsum}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab (Sequences of) lags / leads, differences, growth rates and cumulative sums } } \seealso{ \link[=collapse-documentation]{Collapse Overview}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=time-series-panel-series]{Time Series and Panel Series} } \keyword{manip} \keyword{documentation} collapse/man/across.Rd0000644000176200001440000001514214172533272014421 0ustar liggesusers\name{across} \alias{across} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Apply Functions Across Multiple Columns } \description{ \code{across()} can be used inside \code{\link{fmutate}} and \code{\link{fsummarise}} to apply one or more functions to a selection of columns. It is overall very similar to \code{dplyr::across}, but does not support some \code{rlang} features, has some additional features (arguments), and is optimized to work with \emph{collapse}'s, \code{\link{.FAST_FUN}}, yielding much faster computations. } \usage{ across(.cols = NULL, .fns, ..., .names = NULL, .apply = "auto", .transpose = "auto") # acr(...) can be used to abbreviate across(...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{.cols}{select columns using column names and expressions (e.g. \code{a:b} or \code{c(a, b, c:f)}), column indices, logical vectors, or functions yielding a logical value e.g. \code{is.numeric}. \code{NULL} applies functions to all columns except for grouping columns.} \item{.fns}{A function, character vector of functions or list of functions. Vectors / lists can be named to yield alternative names in the result (see \code{.names}). This argument is evaluated inside \code{substitute()}, and the content (not the names of vectors/lists) is checked against \code{.FAST_FUN} and \code{.OPERATOR_FUN}. Matching functions receive vectorized execution, other functions are applied to the data in a standard way.} \item{\dots}{further arguments to \code{.fns}. Arguments are evaluated in the data environment and split by groups as well (for non-vectorized functions, if of the same length as the data).} \item{.names}{controls the naming of computed columns. \code{NULL} generates names of the form \code{coli_funj} if multiple functions are used. \code{.names = TRUE} enables this for a single function, \code{.names = FALSE} disables it for multiple functions (sensible for functions such as \code{.OPERATOR_FUN} that rename columns (if \code{.apply = FALSE})). It is also possible to supply a function with two arguments for column and function names e.g. \code{function(c, f) paste0(f, "_", c)}. Finally, you can supply a custom vector of names which must match \code{length(.cols) * length(.fns)}.} \item{.apply}{controls whether functions are applied column-by-column (\code{TRUE}) or to multiple columns at once (\code{FALSE}). The default, \code{"auto"}, does the latter for vectorized functions, which have an efficient data frame method. It can also be sensible to use \code{.apply = FALSE} for non-vectorized functions, especially multivariate functions like \code{\link{lm}} or \code{\link{pwcor}}, or functions renaming the data. See Examples. } \item{.transpose}{with multiple \code{.fns}, \code{.transpose} controls whether the result is ordered first by column, then by function (\code{TRUE}), or vice-versa (\code{FALSE}). \code{"auto"} does the former if all functions yield results of the same dimensions (dimensions may differ if \code{.apply = FALSE}). See Examples.} } \note{ \code{across} does not support \emph{purr}-style lambdas, and does not support \code{dplyr}-style predicate functions e.g. \code{across(where(is.numeric), sum)}, simply use \code{across(is.numeric, sum)}. In contrast to \code{dplyr}, you can also compute on grouping columns. In general, my mission with \code{collapse} is not to create a \code{dplyr}-clone, but to take some of the useful features and make them robust and fast using base R and C/C++, with the aim of having a stable API. So don't ask me to implement the latest \emph{dplyr} feature, unless you firmly believe it is very useful and will be around 10 years from now. } \seealso{ \code{\link{fsummarise}}, \code{\link{fmutate}}, \link[=fast-data-manipulation]{Fast Data Manipulation}, \link[=collapse-documentation]{Collapse Overview} } \examples{ # Basic (Weighted) Summaries fsummarise(wlddev, across(PCGDP:GINI, fmean, w = POP)) library(magrittr) # Note: Used because |> is not available on older R versions wlddev \%>\% fgroup_by(region, income) \%>\% fsummarise(across(PCGDP:GINI, fmean, w = POP)) # Note that for these we don't actually need across... fselect(wlddev, PCGDP:GINI) \%>\% fmean(w = wlddev$POP, drop = FALSE) wlddev \%>\% fgroup_by(region, income) \%>\% fselect(PCGDP:GINI, POP) \%>\% fmean(POP, keep.w = FALSE) collap(wlddev, PCGDP + LIFEEX + GINI ~ region + income, w = ~ POP, keep.w = FALSE) # But if we want to use some base R function that reguires argument splitting... wlddev \%>\% na_omit(cols = "POP") \%>\% fgroup_by(region, income) \%>\% fsummarise(across(PCGDP:GINI, weighted.mean, w = POP, na.rm = TRUE)) # Or if we want to apply different functions... wlddev \%>\% fgroup_by(region, income) \%>\% fsummarise(across(PCGDP:GINI, list(mu = fmean, sd = fsd), w = POP), POP_sum = fsum(POP), OECD = fmean(OECD)) # Note that the above still detects fmean as a fast function, the names of the list # are irrelevant, but the function name must be typed or passed as a character vector, # Otherwise functions will be executed by groups e.g. function(x) fmean(x) won't vectorize \donttest{ # Or we want to do more advanced things.. # Such as nesting data frames.. qTBL(wlddev) \%>\% fgroup_by(region, income) \%>\% fsummarise(across(c(PCGDP, LIFEEX, ODA), function(x) list(Nest = list(x)), .apply = FALSE)) # Or linear models.. qTBL(wlddev) \%>\% fgroup_by(region, income) \%>\% fsummarise(across(c(PCGDP, LIFEEX, ODA), function(x) list(Mods = list(lm(PCGDP ~., x))), .apply = FALSE)) # Or cumputing grouped correlation matrices qTBL(wlddev) \%>\% fgroup_by(region, income) \%>\% fsummarise(across(c(PCGDP, LIFEEX, ODA), function(x) qDF(pwcor(x), "Variable"), .apply = FALSE)) } # Here calculating 1- and 10-year lags and growth rates of these variables qTBL(wlddev) \%>\% fgroup_by(country) \%>\% fmutate(across(c(PCGDP, LIFEEX, ODA), list(L, G), n = c(1, 10), t = year, .names = FALSE)) # Same but variables in different order qTBL(wlddev) \%>\% fgroup_by(country) \%>\% fmutate(across(c(PCGDP, LIFEEX, ODA), list(L, G), n = c(1, 10), t = year, .names = FALSE, .transpose = FALSE)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{manip} % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/ffirst_flast.Rd0000644000176200001440000001030614175334642015615 0ustar liggesusers\name{ffirst-flast} \alias{ffirst} \alias{ffirst.default} \alias{ffirst.matrix} \alias{ffirst.data.frame} \alias{ffirst.grouped_df} \alias{flast} \alias{flast.default} \alias{flast.matrix} \alias{flast.data.frame} \alias{flast.grouped_df} \title{Fast (Grouped) First and Last Value for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{ffirst} and \code{flast} are S3 generic functions that (column-wise) returns the first and last values in \code{x}, (optionally) grouped by \code{g}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (groupwise) first and last values. } \usage{ ffirst(x, \dots) flast(x, \dots) \method{ffirst}{default}(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, \dots) \method{flast}{default}(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, \dots) \method{ffirst}{matrix}(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{flast}{matrix}(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{ffirst}{data.frame}(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{flast}{data.frame}(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{ffirst}{grouped_df}(x, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, \dots) \method{flast}{grouped_df}(x, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, \dots) } \arguments{ \item{x}{a vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 1 - "replace_fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. \code{TRUE} skips missing values and returns the first / last non-missing value i.e. if the first (1) / last (n) value is \code{NA}, take the second (2) / second-to-last (n-1) value etc..} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{\dots}{arguments to be passed to or from other methods.} } \value{ \code{ffirst} returns the first value in \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its first value, grouped by \code{g}. Similarly \code{flast} returns the last value in \code{x}, \dots } \seealso{ \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method ffirst(airquality$Ozone) # Simple first value ffirst(airquality$Ozone, airquality$Month) # Grouped first value ffirst(airquality$Ozone, airquality$Month, na.rm = FALSE) # Grouped first, but without skipping initial NA's ## data.frame method ffirst(airquality) ffirst(airquality, airquality$Month) ffirst(airquality, airquality$Month, na.rm = FALSE) # Again first Ozone measurement in month 6 is NA ## matrix method aqm <- qM(airquality) ffirst(aqm) ffirst(aqm, airquality$Month) # etc.. \donttest{ % No code relying on suggested package ## method for grouped data frames - created with dplyr::group_by or fgroup_by library(dplyr) airquality \%>\% group_by(Month) \%>\% ffirst() airquality \%>\% group_by(Month) \%>\% select(Ozone) \%>\% ffirst(na.rm = FALSE) } # Note: All examples generalize to flast. } \keyword{univar} \keyword{manip} collapse/man/extract_list.Rd0000644000176200001440000001453414167160635015643 0ustar liggesusers\name{get_elem} % \alias{extract_list} \alias{atomic_elem} \alias{atomic_elem<-} \alias{list_elem} \alias{list_elem<-} \alias{reg_elem} \alias{irreg_elem} \alias{has_elem} \alias{get_elem} %- Also NEED an '\alias' for EACH other topic documented here. \title{Find and Extract / Subset List Elements} \description{ A suite of functions to subset or extract from (potentially complex) lists and list-like structures. Subsetting may occur according to certain data types, using identifier functions, element names or regular expressions to search the list for certain objects. \itemize{ \item \code{atomic_elem} and \code{list_elem} are non-recursive functions to extract and replace the atomic and sub-list elements at the top-level of the list tree. \item \code{reg_elem} is the recursive equivalent of \code{atomic_elem} and returns the 'regular' part of the list - with atomic elements in the final nodes. \code{irreg_elem} returns all the non-regular elements (i.e. call and terms objects, formulas, etc\dots). See Examples. \item \code{get_elem} returns the part of the list responding to either an identifier function, regular expression or exact element names, or indices applied to all final objects. \code{has_elem} checks for the existence of the searched element and returns \code{TRUE} if a match is found. See Examples. } } \usage{ ## Non-recursive (top-level) subsetting and replacing atomic_elem(l, return = "sublist", keep.class = FALSE) atomic_elem(l) <- value list_elem(l, return = "sublist", keep.class = FALSE) list_elem(l) <- value ## Recursive separation of regular (atomic) and irregular (non-atomic) parts reg_elem(l, recursive = TRUE, keep.tree = FALSE, keep.class = FALSE) irreg_elem(l, recursive = TRUE, keep.tree = FALSE, keep.class = FALSE) ## Extract elements using a function or regular expression get_elem(l, elem, recursive = TRUE, DF.as.list = FALSE, keep.tree = FALSE, keep.class = FALSE, regex = FALSE, \dots) ## Check for the existence of elements has_elem(l, elem, recursive = TRUE, DF.as.list = FALSE, regex = FALSE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{l}{a list.} \item{value}{a list of the same length as the extracted subset of \code{l}.} \item{elem}{a function returning \code{TRUE} or \code{FALSE} when applied to elements of \code{l}, or a character vector of element names or regular expressions (if \code{regex = TRUE}). \code{get_elem} also supports a vector or indices which will be used to subset all final objects.} \item{return}{an integer or string specifying what the selector function should return. The options are: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "sublist" \tab\tab subset of data frame (default) \cr 2 \tab\tab "names" \tab\tab column names \cr 3 \tab\tab "indices" \tab\tab column indices \cr 4 \tab\tab "named_indices" \tab\tab named column indices \cr 5 \tab\tab "logical" \tab\tab logical selection vector \cr 6 \tab\tab "named_logical" \tab\tab named logical vector \cr } \emph{Note}: replacement functions only replace data, However column names are replaced together with the data. } \item{recursive}{logical. Should the list search be recursive (i.e. go though all the elements), or just at the top-level?} \item{DF.as.list}{logical. \code{TRUE} treats data frames like (sub-)lists; \code{FALSE} like atomic elements.} \item{keep.tree}{logical. \code{TRUE} always returns the entire list tree leading up to all matched results, while \code{FALSE} drops the top-level part of the tree if possible.} \item{keep.class}{logical. For classed objects: Should the class be retained?} \item{regex}{logical. Should regular expression search be used on the list names, or only exact matches?} \item{\dots}{further arguments to \code{grep} (if \code{regex = TRUE}).} } \details{ For a lack of better terminology, \emph{collapse} defines 'regular' R objects as objects that are either atomic or a list. \code{reg_elem} with \code{recursive = TRUE} extracts the subset of the list tree leading up to atomic elements in the final nodes. This part of the list tree is unlistable - calling \code{is_unlistable(reg_elem(l))} will be \code{TRUE} for all lists \code{l}. Conversely, all elements left behind by \code{reg_elem} will be picked up be \code{irreg_elem} (if available). Thus \code{is_unlistable(irreg_elem(l))} is always \code{FALSE} for lists with irregular elements (otherwise \code{irreg_elem} returns an empty list). \cr If \code{keep.tree = TRUE}, \code{reg_elem}, \code{irreg_elem} and \code{get_elem} always return the entire list tree, but cut off all of the branches not leading to the desired result. If \code{keep.tree = FALSE}, top-level parts of the tree are omitted so far this is possible. For example in a nested list with three levels and one data-matrix in one of the final branches, \code{get_elem(l, is.matrix, keep.tree = TRUE)} will return a list (\code{lres}) of depth 3, from which the matrix can be accessed as \code{lres[[1]][[1]][[1]]}. This however does not make much sense. \code{get_elem(l, is.matrix, keep.tree = FALSE)} will therefore figgure out that it can drop the entire tree and return just the matrix. \code{keep.tree = FALSE} makes additional optimizations if matching elements are at far-apart corners in a nested structure, by only preserving the hierarchy if elements are above each other on the same branch. Thus for a list \code{l <- list(list(2,list("a",1)),list(1,list("b",2)))} calling \code{get_elem(l, is.character)} will just return \code{list("a","b")}. } % \value{ % } \seealso{ \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview} } \examples{ m <- qM(mtcars) get_elem(list(list(list(m))), is.matrix) get_elem(list(list(list(m))), is.matrix, keep.tree = TRUE) l <- list(list(2,list("a",1)),list(1,list("b",2))) has_elem(l, is.logical) has_elem(l, is.numeric) get_elem(l, is.character) get_elem(l, is.character, keep.tree = TRUE) l <- lm(mpg ~ cyl + vs, data = mtcars) str(reg_elem(l)) str(irreg_elem(l)) get_elem(l, is.matrix) get_elem(l, "residuals") get_elem(l, "fit", regex = TRUE) has_elem(l, "tol") get_elem(l, "tol") } \keyword{list} \keyword{manip} collapse/man/collapse-options.Rd0000644000176200001440000001236714170153362016424 0ustar liggesusers\name{collapse-options} \alias{collapse-options} \title{\emph{collapse} Package Options} \description{ \itemize{ \item \code{option("collapse_unused_arg_action")} regulates how generic functions (such as the \link[=fast-statistical-functions]{Fast Statistical Functions}) in the package react when an unknown argument is passed to a method. The default action is \code{"warning"} which issues a warning. Other options are \code{"error"}, \code{"message"} or \code{"none"}, whereby the latter enables silent swallowing of such arguments. \item \code{option("collapse_mask")} can be used to create additional functions in the \emph{collapse} namespace when loading the package, which will mask some existing base R and \emph{dplyr} functions. In particular, \emph{collapse} provides a large number of functions that start with 'f' e.g. \code{fsubset}, \code{ftransform}, \code{fdroplevels} etc.. Specifying \code{options(collapse_mask = c("fsubset", "ftransform", "fdroplevels"))} before loading the package will make additional functions \code{subset}, \code{transform}, and \code{droplevels} available to the user, and mask the corresponding base R functions when the package is attached. In general, all functions starting with 'f' can be passed to the option. There are also a couple of keywords that you can specify to add groups of functions: \itemize{ \item \code{"manip"} adds data manipulation functions: \code{fsubset, ftransform, ftransform<-, ftransformv, fcompute, fcomputev, fselect, fselect<-, fgroup_by, fgroup_vars, fungroup, fsummarise, fmutate, frename} \item \code{"helper"} adds the functions: \code{fdroplevels}, \code{finteraction}, \code{funique}, \code{fnlevels}, \code{fnrow} and \code{fncol}. % fdim not because of infinite recursion \item \code{"fast-fun"} adds the functions contained in the macro: \code{.FAST_FUN}. \item \code{"fast-stat-fun"} adds the functions contained in the macro: \code{.FAST_STAT_FUN}. \item \code{"fast-trfm-fun"} adds the functions contained in: \code{setdiff(.FAST_FUN, .FAST_STAT_FUN)}. \item \code{"all"} turns on all of the above. } Note that none of these options will impact internal \emph{collapse} code, but they may change the way your programs run. \code{"manip"} is probably the safest option to start with. Specifying \code{"fast-fun"}, \code{"fast-stat-fun"}, \code{"fast-trfm-fun"} or \code{"all"} are ambitious as they replace basic R functions like \code{sum} and \code{max}, introducing \emph{collapse}'s \code{na.rm = TRUE} default and different behavior for matrices and data frames, and these options also changes some internal macros so that base R functions like \code{sum} or \code{max} called inside \code{fsummarise}, \code{fmutate} or \code{collap} will also receive vectorized execution. In other words, if you put \code{options(collapse_mask = "all")} before loading the package, and you have a collapse-compatible line of dplyr code like \code{wlddev |> group_by(region, income) |> summarise(across(PCGDP:POP, sum))}, this will now receive fully optimized execution. Note however that because of \code{collapse}'s \code{na.rm = TRUE} default, the result will be different unless you add \code{na.rm = FALSE}. In General, this option is for your convenience, if you want to write visually more appealing code or you want to translate existing dplyr codes to \emph{collapse}. Use with care! For production code I generally recommend not using it. % Note also that I have not yet investigated in much detail the impact on internal codes of replacing basic R functions like \code{sum}, \code{min} and \code{max} in the package. In terms of unit testing it seems to be quite ok (a number of tests break where I compare fast statistical functions to base functions because of the \code{na.rm = TRUE} default, but otherwise no major issues arise). In general, while this option is cool and can be of great help in translating existing \emph{dplyr} codes to \emph{collapse}, it is experimental and not the best idea for new production code. \item \code{option("collapse_F_to_FALSE")}, if set to \code{TRUE}, replaces the lead operator \code{F} in the package with a value \code{FALSE} when loading the package, which solves issues arising from the use of \code{F} as a shortcut for \code{FALSE} in R codes when \emph{collapse} is attached. Note that \code{F} is just a value in the \emph{base} package namespace, and it should NOT be used in production codes, precisely because users can overwrite it by assignment. An alternative solution to invoking this option would also just be assigning a value \code{F <- FALSE} in your global environment. \item \code{option("collapse_DT_alloccol")} sets how many empty columns \emph{collapse} data manipulation functions like \code{ftransform} allocate when taking a shallow copy of \emph{data.table}'s. The default is \code{100L}. Note that the \emph{data.table} default is \code{getOption("datatable.alloccol") = 1024L}. I chose a lower default because shallow copies are taken by each data manipulation function if you manipulate \emph{data.table}'s with collapse, and the cost increases with the number of overallocated columns. With 100 columns, the cost is 2-5 microseconds per copy. } } \seealso{ \link[=collapse-documentation]{Collapse Overview}, \link{collapse-package} } \keyword{documentation} collapse/man/list-processing.Rd0000644000176200001440000001164214170154047016252 0ustar liggesusers\name{list-processing} \alias{A8-list-processing} \alias{list-processing} \title{List Processing} % \emph{collapse} \description{ \emph{collapse} provides the following set of functions to efficiently work with lists of R objects: \itemize{ \item \bold{Search and Identification}\itemize{ \item \code{\link{is_unlistable}} checks whether a (nested) list is composed of atomic objects in all final nodes, and thus unlistable to an atomic vector using \code{\link{unlist}}. \item \code{\link{ldepth}} determines the level of nesting of the list (i.e. the maximum number of nodes of the list-tree). \item \code{\link{has_elem}} searches elements in a list using element names, regular expressions applied to element names, or a function applied to the elements, and returns \code{TRUE} if any matches were found. } \item \bold{Subsetting} \itemize{ \item \code{\link{atomic_elem}} examines the top-level of a list and returns a sublist with the atomic elements. Conversely \code{\link{list_elem}} returns the sublist of elements which are themselves lists or list-like objects. \item \code{\link{reg_elem}} and \code{\link{irreg_elem}} are recursive versions of the former. \code{\link{reg_elem}} extracts the 'regular' part of the list-tree leading to atomic elements in the final nodes, while \code{\link{irreg_elem}} extracts the 'irregular' part of the list tree leading to non-atomic elements in the final nodes. (\emph{Tip}: try calling both on an \code{lm} object). Naturally for all lists \code{l}, \code{is_unlistable(reg_elem(l))} evaluates to \code{TRUE}\dots \item \code{\link{get_elem}} extracts elements from a list using element names, regular expressions applied to element names, a function applied to the elements, or element-indices used to subset the lowest-level sub-lists. by default the result is presented as a simplified list containing all matching elements. With the \code{keep.tree} option however \code{\link{get_elem}} can also be used to subset lists i.e. maintain the full tree but cut off non-matching branches. } \item \bold{Splitting and Transposition} \itemize{ \item \code{\link{rsplit}} recursively splits a vector or data frame into subsets according to combinations of (multiple) vectors / factors - by default returning a (nested) list. If \code{flatten = TRUE}, the list is flattened yielding the same result as \code{\link{split}}. \code{rsplit} is also faster than \code{\link{split}}, particularly for data frames. \item \code{\link{t_list}} efficiently transposes nested lists of lists, such as those obtained from splitting a data frame by multiple variables using \code{\link{rsplit}}. } \item \bold{Apply Functions} \itemize{ \item \code{\link{rapply2d}} is a recursive version of \code{\link{lapply}} with two key differences to \code{\link{rapply}}: (1) Data frames are considered as atomic objects, not as (sub-)lists, and (2) the result is not simplified. } \item \bold{Unlisting / Row-Binding} \itemize{ \item \code{\link{unlist2d}} efficiently unlists unlistable lists in 2-dimensions and creates a data frame (or \emph{data.table}) representation of the list (unlike \code{\link{unlist}} which returns an atomic vector). This is done by recursively flattening and row-binding R objects in the list (using \code{data.table::rbindlist}) while creating identifier columns for each level of the list-tree and (optionally) saving the row-names of the objects in a separate column. \code{\link{unlist2d}} can thus also be understood as a recursive generalization of \code{do.call(rbind, l)}, for lists of vectors, data frames, arrays or heterogeneous objects. } } } \section{Table of Functions}{ \tabular{lll}{\emph{ Function } \tab\tab \emph{ Description } \cr % \code{\link{is.regular}} \tab\tab \code{function(x) is.atomic(x) || is.list(x)} \cr \code{\link{is_unlistable}} \tab\tab Checks if list is unlistable \cr \code{\link{ldepth}} \tab\tab Level of nesting / maximum depth of list-tree \cr \code{\link{has_elem}} \tab\tab Checks if list contains a certain element \cr \code{\link{get_elem}} \tab\tab Subset list / extract certain elements \cr \code{\link{atomic_elem}} \tab\tab Top-level subset atomic elements \cr \code{\link{list_elem}} \tab\tab Top-level subset list/list-like elements \cr \code{\link{reg_elem}} \tab\tab Recursive version of \code{atomic_elem}: Subset / extract 'regular' part of list \cr \code{\link{irreg_elem}} \tab\tab Subset / extract non-regular part of list \cr \code{\link{rsplit}} \tab\tab Recursively split vectors or data frames / lists \cr \code{\link{t_list}} \tab\tab Transpose lists of lists \cr \code{\link{rapply2d}} \tab\tab Recursively apply functions to lists of data objects \cr \code{\link{unlist2d}} \tab\tab Recursively unlist/row-bind lists of data objects in 2D, to data frame or \emph{data.table} \cr } } \seealso{ \link[=collapse-documentation]{Collapse Overview} } \keyword{list} \keyword{manip} \keyword{documentation} collapse/man/unlist2d.Rd0000644000176200001440000001642714167160635014705 0ustar liggesusers\name{unlist2d} \alias{unlist2d} \title{ Recursive Row-Binding / Unlisting in 2D - to Data Frame } \description{ \code{unlist2d} efficiently unlists lists of regular R objects (objects built up from atomic elements) and creates a data frame representation of the list through recursive flattening and intelligent row-binding operations. It is a full 2-dimensional generalization of \code{\link{unlist}}, but best understood as a recursive generalization of \code{do.call(rbind, ...)}. This function is a powerful tool to create a tidy data frame representation from (nested) lists of vectors, data frames, matrices, arrays or heterogeneous objects. % (i.e. unlisting happens via recursive flattening and intelligent row-binding of objects, see Details and Examples). } \usage{ unlist2d(l, idcols = ".id", row.names = FALSE, recursive = TRUE, id.factor = FALSE, DT = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{l}{a unlistable list (with atomic elements in all final nodes, see \code{\link{is_unlistable}}).} \item{idcols}{a character stub or a vector of names for id-columns automatically added - one for each level of nesting in \code{l}. By default the stub is \code{".id"}, so columns will be of the form \code{".id.1", ".id.2",} etc... . if \code{idcols = TRUE}, the stub is also set to \code{".id"}. If \code{idcols = FALSE}, id-columns are omitted. The content of the id columns are the list names, or (if missing) integers for the list elements. Missing elements in asymmetric nested structures are filled up with \code{NA}. See Examples. } \item{row.names}{\code{TRUE} extracts row names from all the objects in \code{l} (where available) and adds them to the output in a column named \code{"row.names"}. Alternatively, a column name i.e. \code{row.names = "file"} can be supplied. For plain matrices in \code{l}, integer row names are generated. } \item{recursive}{logical. if \code{FALSE}, only process the lowest (deepest) level of \code{l}.} \item{id.factor}{if \code{TRUE} and \code{idcols != FALSE}, create id columns as factors instead of character or integer vectors. Alternatively it is possible to specify \code{id.factor = "ordered"} to generate ordered factor id's. This is useful if id's are used for further analysis e.g. as inputs to \code{ggplot2}. } \item{DT}{logical. \code{TRUE} returns a \emph{data.table}, not a data.frame.} } \details{ The data frame representation created by \code{unlist2d} is built as follows: \itemize{ \item Recurse down to the lowest level of the list-tree, data frames are exempted and treated as a final elements. \item Identify the objects, if they are vectors, matrices or arrays convert them to data frame (in the case of atomic vectors each element becomes a column). \item Row-bind these data frames using \emph{data.table}'s \code{rbindlist} function. Columns are matched by name. If the number of columns differ, fill empty spaces with \code{NA}'s. If \code{idcols != FALSE}, create id-columns on the left, filled with the object names or indices (if the (sub-)list is unnamed). If \code{row.names != FALSE}, store row names of the objects (if available) in a separate column. \item Move up to the next higher level of the list-tree and repeat: Convert atomic objects to data frame and row-bind while matching all columns and filling unmatched ones with \code{NA}'s. Create another id-column for each level of nesting passed through. If the list-tree is asymmetric, fill empty spaces in lower-level id columns with \code{NA}'s. } The result of this iterative procedure is a single data frame containing on the left side id-columns for each level of nesting (from higher to lower level), followed by a column containing all the row.names of the objects (if \code{row.names != FALSE}), followed by the object columns, matched at each level of recursion. Optimal results are of course obtained with symmetric lists of arrays, matrices or data frames, which \code{unlist2d} efficiently binds into a beautiful data frame ready for plotting or further analysis. See examples below. } \value{ A data frame or (if \code{DT = TRUE}) a \emph{data.table}. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } \note{ For lists of data frames \code{unlist2d} works just like \code{data.table::rbindlist(l, use.names = TRUE, fill = TRUE, idcol = ".id")} (also the same speed), however for lists of lists \code{unlist2d} does not produce the same output as \code{data.table::rbindlist}. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{rsplit}}, \code{\link{rapply2d}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Basic Examples: l <- list(mtcars, list(mtcars, mtcars)) tail(unlist2d(l)) unlist2d(rapply2d(l, fmean)) l = list(a = qM(mtcars[1:8]), b = list(c = mtcars[4:11], d = list(e = mtcars[2:10], f = mtcars))) tail(unlist2d(l, row.names = TRUE)) unlist2d(rapply2d(l, fmean)) unlist2d(rapply2d(l, fmean), recursive = FALSE) ## Groningen Growth and Development Center 10-Sector Database head(GGDC10S) # See ?GGDC10S namlab(GGDC10S, class = TRUE) # Panel-Summarize this data by Variable (Emloyment and Value Added) l <- qsu(GGDC10S, by = ~ Variable, # Output as list (instead of 4D array) pid = ~ Variable + Country, cols = 6:16, array = FALSE) str(l, give.attr = FALSE) # A list of 2-levels with matrices of statistics head(unlist2d(l)) # Default output, missing the variables (row-names) head(unlist2d(l, row.names = TRUE)) # Here we go, but this is still not very nice head(unlist2d(l, idcols = c("Sector","Trans"), # Now this is looking pretty good row.names = "Variable")) dat <- unlist2d(l, c("Sector","Trans"), # Id-columns can also be generated as factors "Variable", id.factor = TRUE) str(dat) # Split this sectoral data, first by Variable (Emloyment and Value Added), then by Country sdat <- rapply2d(split(GGDC10S[c(1,6:16)], GGDC10S$Variable), function(x) split(x[-1],x[[1]])) # Compute pairwise correlations between sectors and recombine: dat <- unlist2d(rapply2d(sdat, pwcor), idcols = c("Variable","Country"), row.names = "Sector") head(dat) plot(hclust(as.dist(1-pwcor(dat[-(1:3)])))) # Using corrs. as distance metric to cluster sectors # Together with other functions like psmat, unlist2d can also effectively help reshape data: head(unlist2d(psmat(subset(GGDC10S, Variable == "VA"), ~Country, ~Year, cols = 6:16, array = FALSE), idcols = "Sector", row.names = "Country"), 2) } % # We can also examine the correlations of Growth rates of VA in each sector across countries % dat <- G(subset(GGDC10S, Variable == "VA"),1,1, ~ Country, ~Year, cols = 6:16) % dat <- psmat(dat, ~ Country, ~Year) % plot(dat, legend = TRUE) % dat[dat > 100] = NA # remove outliers % plot(dat, legend = TRUE) % sort(apply(dat, 3, function(x) fmean.default(pwcor(x)))) % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{list} collapse/man/ldepth.Rd0000644000176200001440000000323314167160635014410 0ustar liggesusers\name{ldepth} \alias{ldepth} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Determine the Depth / Level of Nesting of a List } \description{ \code{ldepth} provides the depth of a list or list-like structure. } \usage{ ldepth(l, DF.as.list = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{l}{a list.} \item{DF.as.list}{logical. \code{TRUE} treats data frames like (sub-)lists; \code{FALSE} like atomic elements.} } \details{ The depth or level or nesting of a list or list-like structure (e.g. a classed object) is found by recursing down to the bottom of the list and adding an integer count of 1 for each level passed. For example the depth of a data frame is 1. If a data frame has list-columns, the depth is 2. However for reasons of efficiency, if \code{l} is not a data frame and \code{DF.as.list = FALSE}, data frames found inside \code{l} will not be checked for list column's but assumed to have a depth of 1. } \value{ A single integer indicating the depth of the list. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } % \note{ %% ~~further notes~~ % } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{is_unlistable}}, \code{\link{has_elem}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview} } \examples{ l <- list(1, 2) ldepth(l) l <- list(1, 2, mtcars) ldepth(l) ldepth(l, DF.as.list = FALSE) l <- list(1, 2, list(4, 5, list(6, mtcars))) ldepth(l) ldepth(l, DF.as.list = FALSE) } \keyword{list} \keyword{utilities} collapse/man/fnth.Rd0000644000176200001440000002306214175334642014071 0ustar liggesusers\name{fnth} \alias{fnth} \alias{fnth.default} \alias{fnth.matrix} \alias{fnth.data.frame} \alias{fnth.grouped_df} \title{ Fast (Grouped, Weighted) N'th Element/Quantile for Matrix-Like Objects } \description{ \code{fnth} (column-wise) returns the n'th smallest element from a set of unsorted elements \code{x} corresponding to an integer index (\code{n}), or to a probability between 0 and 1. If \code{n} is passed as a probability, ties can be resolved using the lower, upper, or (default) average of the possible elements. These are discontinuous and fast methods to estimate a sample quantile. } \usage{ fnth(x, n = 0.5, \dots) \method{fnth}{default}(x, n = 0.5, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, ties = "mean", \dots) \method{fnth}{matrix}(x, n = 0.5, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ties = "mean", \dots) \method{fnth}{data.frame}(x, n = 0.5, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ties = "mean", \dots) \method{fnth}{grouped_df}(x, n = 0.5, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, ties = "mean", \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{n}{the element to return using a single integer index such that \code{1 < n < NROW(x)}, or a probability \code{0 < n < 1}. See Details. } \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{w}{a numeric vector of (non-negative) weights, may contain missing values.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 1 - "replace_fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost. If \code{na.rm = FALSE} a \code{NA} is returned when encountered.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{ties}{an integer or character string specifying the method to resolve ties between adjacent qualifying elements: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "mean" \tab\tab take the arithmetic mean of all qualifying elements. \cr 2 \tab\tab "min" \tab\tab take the smallest of the elements. \cr 3 \tab\tab "max" \tab\tab take the largest of the elements. \cr } } \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{keep.w}{\emph{grouped_df method:} Logical. Retain \code{sum} of weighting variable after computation (if contained in \code{grouped_df}).} \item{\dots}{arguments to be passed to or from other methods.} } \details{ This is an R port to \code{std::nth_element}, an efficient partial sorting algorithm in C++. It is also used to calculated the median (in fact the default \code{fnth(x, n = 0.5)} is identical to \code{fmedian(x)}, so see also the details for \code{\link{fmedian}}). \code{fnth} generalizes the principles of median value calculation to find arbitrary elements. It offers considerable flexibility by providing both simple order statistics and simple discontinuous quantile estimation. Regarding the former, setting \code{n} to an index between 1 and \code{NROW(x)} will return the n'th smallest element of \code{x}, about 2x faster than \code{sort(x, partial = n)[n]}. As to the latter, setting \code{n} to a probability between 0 and 1 will return the corresponding element of \code{x}, and resolve ties between multiple qualifying elements (such as when \code{n = 0.5} and \code{x} is even) using the arithmetic average \code{ties = "mean"}, or the smallest \code{ties = "min"} or largest \code{ties = "max"} of those elements. If \code{n > 1} is used and \code{x} contains missing values (and \code{na.rm = TRUE}, otherwise \code{NA} is returned), \code{n} is internally converted to a probability using \code{p = (n-1)/(NROW(x)-1)}, and that probability is applied to the set of complete elements (of each column if \code{x} is a matrix or data frame) to find the \code{as.integer(p*(fnobs(x)-1))+1L}'th element (which corresponds to option \code{ties = "min"}). Note that it is necessary to subtract and add 1 so that \code{n = 1} corresponds to \code{p = 0} and \code{n = NROW(x)} to \code{p = 1}. %So if \code{n > 1} is used in the presence of missing values, and the default \code{ties = "mean"} is enabled, the resulting element could be the average of two elements. When using grouped computations (supplying a vector or list to \code{g} subdividing \code{x}) and \code{n > 1} is used, it is transformed to a probability \code{p = (n-1)/(NROW(x)/ng-1)} (where \code{ng} contains the number of unique groups in \code{g}) and \code{ties = "min"} is used to sort out clashes. This could be useful for example to return the n'th smallest element of each group in a balanced panel, but with unequal group sizes it more intuitive to pass a probability to \code{n}. If weights are used, the same principles apply as for weighted median calculation: A target partial sum of weights \code{p*sum(w)} is calculated, and the weighted n'th element is the element k such that all elements smaller than k have a sum of weights \code{<= p*sum(w)}, and all elements larger than k have a sum of weights \code{<= (1 - p)*sum(w)}. If the partial-sum of weights (\code{p*sum(w)}) is reached exactly for some element k, then (summing from the lower end) both k and k+1 would qualify as the weighted n'th element (and some possible additional elements with zero weights following k would also qualify). If \code{n > 1}, the lowest of those elements is chosen (congruent with the unweighted behavior), %(ensuring that \code{fnth(x, n)}) and \code{fnth(x, n, w = rep(1, NROW(x)))}, always provide the same outcome) but if \code{0 < n < 1}, the \code{ties} option regulates how to resolve such conflicts, yielding lower-weighted, upper-weighted or (default) average weighted n'th elements. The weighted n'th element is computed using \code{\link{radixorder}} to first obtain an ordering of all elements, so it is considerably more computationally expensive than the unweighted version. With groups, the entire vector is also ordered, and the weighted n'th element is computed in a single ordered pass through the data (after calculating partial-group sums of the weights, skipping weights for which \code{x} is missing). If \code{x} is a matrix or data frame, these computations are performed independently for each column. Column-attributes and overall attributes of a data frame are preserved (if \code{g} is used or \code{drop = FALSE}). } \value{ The (\code{w} weighted) n'th element of \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its n'th element, grouped by \code{g}. } \seealso{ \code{\link{fmean}}, \code{\link{fmedian}}, \code{\link{fmode}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method mpg <- mtcars$mpg fnth(mpg) # Simple nth element: Median (same as fmedian(mpg)) fnth(mpg, 5) # 5th smallest element sort(mpg, partial = 5)[5] # Same using base R, fnth is 2x faster. fnth(mpg, 0.75) # Third quartile fnth(mpg, 0.75, w = mtcars$hp) # Weighted third quartile: Weighted by hp fnth(mpg, 0.75, TRA = "-") # Simple transformation: Subtract third quartile fnth(mpg, 0.75, mtcars$cyl) # Grouped third quartile fnth(mpg, 0.75, mtcars[c(2,8:9)]) # More groups.. g <- GRP(mtcars, ~ cyl + vs + am) # Precomputing groups gives more speed ! fnth(mpg, 0.75, g) fnth(mpg, 0.75, g, mtcars$hp) # Grouped weighted third quartile fnth(mpg, 0.75, g, TRA = "-") # Groupwise subtract third quartile fnth(mpg, 0.75, g, mtcars$hp, "-") # Groupwise subtract weighted third quartile ## data.frame method fnth(mtcars, 0.75) head(fnth(mtcars, 0.75, TRA = "-")) fnth(mtcars, 0.75, g) fnth(fgroup_by(mtcars, cyl, vs, am), 0.75) # Another way of doing it.. fnth(mtcars, 0.75, g, use.g.names = FALSE) # No row-names generated ## matrix method m <- qM(mtcars) fnth(m, 0.75) head(fnth(m, 0.75, TRA = "-")) fnth(m, 0.75, g) # etc.. \donttest{ % No code relying on suggested package library(dplyr) ## grouped_df method mtcars \%>\% group_by(cyl,vs,am) \%>\% fnth(0.75) mtcars \%>\% group_by(cyl,vs,am) \%>\% fnth(0.75, hp) # Weighted mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fnth(0.75) # Faster grouping! mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fnth(0.75, TRA = "/") # Divide by third quartile mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fselect(mpg, hp) \%>\% # Faster selecting fnth(0.75, hp, "/") # Divide mpg by its third weighted group-quartile, using hp as weights } } \keyword{univar} \keyword{manip} collapse/man/funique.Rd0000644000176200001440000000573214174223734014610 0ustar liggesusers\name{funique} \alias{funique} \alias{funique.default} \alias{funique.data.frame} \alias{funique.sf} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Unique Elements / Rows } \description{ \code{funique} is a substantially faster alternative to \code{\link{unique}}. It is generic with a default vector and a data frame methods. } \usage{ funique(x, \dots) \method{funique}{default}(x, sort = FALSE, method = "auto", \dots) \method{funique}{data.frame}(x, cols = NULL, sort = FALSE, method = "auto", \dots) \method{funique}{sf}(x, cols = NULL, sort = FALSE, method = "auto", \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a atomic vector or data frame / list of equal-length columns. } \item{sort}{logical. \code{TRUE} orders the unique elements / rows. \code{FALSE} returns unique values in order of first occurrence. } \item{method}{an integer or character string specifying the method of computation: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "auto" \tab\tab automatic selection: hash if \code{sort = FALSE} else radix. \cr 2 \tab\tab "radix" \tab\tab use radix ordering to determine unique values. Supports \code{sort = FALSE} but only for character data. \cr 3 \tab\tab "hash" \tab\tab use index hashing to determine unique values. Supports \code{sort = TRUE} but only for atomic vectors (default method). \cr } } \item{cols}{compute unique rows according to a subset of columns. Columns can be selected using column names, indices, a logical vector or a selector function (e.g. \code{is.character}). \emph{Note:} All columns are returned. } \item{\dots}{arguments passed to \code{\link{radixorderv}}, e.g. \code{decreasing} or \code{na.last}. Only applicable if \code{method = "radix"}.} } \details{ If \code{x} is a data frame / list and all rows are already unique, then \code{x} is returned. Otherwise a copy of \code{x} with duplicate rows removed is returned. See \code{\link{group}} for some additional computational details. The \emph{sf} method simply ignores the geometry column when determining unique values. } \value{ \code{x} with duplicate elements/rows removed. % sorted in ascending order if \code{sort = TRUE}, and in order of first occurrence if \code{sort = FALSE}. } \seealso{ \code{\link{group}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview}. } \examples{ funique(mtcars$cyl) funique(gv(mtcars, c(2,8,9))) funique(mtcars, cols = c(2,8,9)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{manip} % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/fscale.Rd0000644000176200001440000002671014175334642014372 0ustar liggesusers\name{fscale} \alias{fscale} \alias{fscale.default} \alias{fscale.matrix} \alias{fscale.data.frame} \alias{fscale.pseries} \alias{fscale.pdata.frame} \alias{fscale.grouped_df} % \alias{standardize} \alias{STD} \alias{STD.default} \alias{STD.matrix} \alias{STD.data.frame} \alias{STD.pseries} \alias{STD.pdata.frame} \alias{STD.grouped_df} % - Also NEED an '\alias' for EACH other topic documented here. \title{ Fast (Grouped, Weighted) Scaling and Centering of Matrix-like Objects } \description{ \code{fscale} is a generic function to efficiently standardize (scale and center) data. \code{STD} is a wrapper around \code{fscale} representing the 'standardization operator', with more options than \code{fscale} when applied to matrices and data frames. Standardization can be simple or groupwise, ordinary or weighted. Arbitrary target means and standard deviations can be set, with special options for grouped scaling and centering. It is also possible to scale data without centering i.e. perform mean-preserving scaling. \emph{Note}: For centering without scaling see \code{\link[=fwithin]{fwithin/W}}. For simple not mean-preserving scaling use \code{\link[=fsd]{fsd(..., TRA = "/")}}. To sweep pre-computed means and scale-factors out of data see \code{\link{TRA}}. } \usage{ fscale(x, \dots) STD(x, \dots) \method{fscale}{default}(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, sd = 1, \dots) \method{STD}{default}(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, sd = 1, \dots) \method{fscale}{matrix}(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, sd = 1, \dots) \method{STD}{matrix}(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, sd = 1, stub = "STD.", \dots) \method{fscale}{data.frame}(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, sd = 1, \dots) \method{STD}{data.frame}(x, by = NULL, w = NULL, cols = is.numeric, na.rm = TRUE, mean = 0, sd = 1, stub = "STD.", keep.by = TRUE, keep.w = TRUE, \dots) # Methods for compatibility with plm: \method{fscale}{pseries}(x, effect = 1L, w = NULL, na.rm = TRUE, mean = 0, sd = 1, \dots) \method{STD}{pseries}(x, effect = 1L, w = NULL, na.rm = TRUE, mean = 0, sd = 1, \dots) \method{fscale}{pdata.frame}(x, effect = 1L, w = NULL, na.rm = TRUE, mean = 0, sd = 1, \dots) \method{STD}{pdata.frame}(x, effect = 1L, w = NULL, cols = is.numeric, na.rm = TRUE, mean = 0, sd = 1, stub = "STD.", keep.ids = TRUE, keep.w = TRUE, \dots) # Methods for grouped data frame / compatibility with dplyr: \method{fscale}{grouped_df}(x, w = NULL, na.rm = TRUE, mean = 0, sd = 1, keep.group_vars = TRUE, keep.w = TRUE, \dots) \method{STD}{grouped_df}(x, w = NULL, na.rm = TRUE, mean = 0, sd = 1, stub = "STD.", keep.group_vars = TRUE, keep.w = TRUE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a numeric vector, matrix, data frame, panel series (\code{plm::pseries}), panel data frame (\code{plm::pdata.frame}) or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{by}{\emph{STD data.frame method}: Same as \code{g}, but also allows one- or two-sided formulas i.e. \code{~ group1} or \code{var1 + var2 ~ group1 + group2}. See Examples.} \item{cols}{\emph{data.frame method}: Select columns to scale using a function, column names, indices or a logical vector. Default: All numeric variables. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{w}{a numeric vector of (non-negative) weights. \code{STD} data frame and \code{pdata.frame} methods also allow a one-sided formula i.e. \code{~ weightcol}. The \code{grouped_df} (\emph{dplyr}) method supports lazy-evaluation. See Examples.} \item{na.rm}{logical. Skip missing values in \code{x} or \code{w} when computing means and sd's.} \item{effect}{\emph{plm} methods: Select which panel identifier should be used as group-id. 1L takes the first variable in the \code{plm::index}, 2L the second etc.. Index variables can also be called by name using a character string. More than one variable can be supplied. } \item{stub}{a prefix or stub to rename all transformed columns. \code{FALSE} will not rename columns.} \item{mean}{the mean to center on (default is 0). If \code{mean = FALSE}, no centering will be performed. In that case the scaling is mean-preserving. A numeric value different from 0 (i.e. \code{mean = 5}) will be added to the data after subtracting out the mean(s), such that the data will have a mean of 5. A special option when performing grouped scaling and centering is \code{mean = "overall.mean"}. In that case the overall mean of the data will be added after subtracting out group means.} \item{sd}{the standard deviation to scale the data to (default is 1). A numeric value different from 0 (i.e. \code{sd = 3}) will scale the data to have a standard deviation of 3. A special option when performing grouped scaling is \code{sd = "within.sd"}. In that case the within standard deviation (= the standard deviation of the group-centered series) will be calculated and applied to each group. The results is that the variance of the data within each group is harmonized without forcing a certain variance (such as 1).} \item{keep.by, keep.ids, keep.group_vars}{\emph{data.frame, pdata.frame and grouped_df methods}: Logical. Retain grouping / panel-identifier columns in the output. For \code{STD.data.frame} this only works if grouping variables were passed in a formula.} \item{keep.w}{\emph{data.frame, pdata.frame and grouped_df methods}: Logical. Retain column containing the weights in the output. Only works if \code{w} is passed as formula / lazy-expression.} \item{\dots}{arguments to be passed to or from other methods.} } \details{ If \code{g = NULL}, \code{fscale} by default (column-wise) subtracts the mean or weighted mean (if \code{w} is supplied) from all data points in \code{x}, and then divides this difference by the standard deviation or frequency-weighted standard deviation (if \code{w} is supplied). The result is that all columns in \code{x} will have mean 0 and standard deviation 1. Alternatively, data can be scaled to have a mean of \code{mean} and a standard deviation of \code{sd}. If \code{mean = FALSE} the data is only scaled (not centered) such that the mean of the data is preserved. \cr Means and standard deviations are computed using Welford's numerically stable online algorithm. With groups supplied to \code{g}, this standardizing becomes groupwise, so that in each group (in each column) the data points will have mean \code{mean} and standard deviation \code{sd}. Naturally if \code{mean = FALSE} then each group is just scaled and the mean is preserved. For centering without scaling see \code{\link{fwithin}}. If \code{na.rm = FALSE} and a \code{NA} or \code{NaN} is encountered, the mean and sd for that group will be \code{NA}, and all data points belonging to that group will also be \code{NA} in the output. If \code{na.rm = TRUE}, means and sd's are computed (column-wise) on the available data points, and also the weight vector can have missing values. In that case, the weighted mean an sd are computed on (column-wise) \code{complete.cases(x, w)}, and \code{x} is scaled using these statistics. \emph{Note} that \code{fscale} will not insert a missing value in \code{x} if the weight for that value is missing, rather, that value will be scaled using a weighted mean and standard-deviated computed without itself! (The intention here is that a few (randomly) missing weights shouldn't break the computation when \code{na.rm = TRUE}, but it is not meant for weight vectors with many missing values. If you don't like this behavior, you should prepare your data using \code{x[is.na(w), ] <- NA}, or impute your weight vector for non-missing \code{x}). Special options for grouped scaling are \code{mean = "overall.mean"} and \code{sd = "within.sd"}. The former group-centers vectors on the overall mean of the data (see \code{\link{fwithin}} for more details) and the latter scales the data in each group to have the within-group standard deviation (= the standard deviation of the group-centered data). Thus scaling a grouped vector with options \code{mean = "overall.mean"} and \code{sd = "within.sd"} amounts to removing all differences in the mean and standard deviations between these groups. In weighted computations, \code{mean = "overall.mean"} will subtract weighted group-means from the data and add the overall weighted mean of the data, whereas \code{sd = "within.sd"} will compute the weighted within- standard deviation and apply it to each group. } \value{ \code{x} standardized (mean = mean, standard deviation = sd), grouped by \code{g/by}, weighted with \code{w}. See Details. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } % \note{ %% ~~further notes~~ % } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{fwithin}}, \code{\link{fsd}}, \code{\link{TRA}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Simple Scaling & Centering / Standardizing head(fscale(mtcars)) # Doesn't rename columns head(STD(mtcars)) # By default adds a prefix qsu(STD(mtcars)) # See that is works qsu(STD(mtcars, mean = 5, sd = 3)) # Assigning a mean of 5 and a standard deviation of 3 qsu(STD(mtcars, mean = FALSE)) # No centering: Scaling is mean-preserving ## Panel Data head(fscale(get_vars(wlddev,9:12), wlddev$iso3c)) # Standardizing 4 series within each country head(STD(wlddev, ~iso3c, cols = 9:12)) # Same thing using STD, id's added pwcor(fscale(get_vars(wlddev,9:12), wlddev$iso3c)) # Correlaing panel series after standardizing fmean(get_vars(wlddev, 9:12)) # This calculates the overall means fsd(fwithin(get_vars(wlddev, 9:12), wlddev$iso3c)) # This calculates the within standard deviations head(qsu(fscale(get_vars(wlddev, 9:12), # This group-centers on the overall mean and wlddev$iso3c, # group-scales to the within standard deviation mean = "overall.mean", sd = "within.sd"), # -> data harmonized in the first 2 moments by = wlddev$iso3c)) \donttest{ % No code relying on suggested package ## Using plm pwlddev <- plm::pdata.frame(wlddev, index = c("iso3c","year")) head(STD(pwlddev)) # Standardizing all numeric variables by country head(STD(pwlddev, effect = 2L)) # Standardizing all numeric variables by year ## Weighted Standardizing weights = abs(rnorm(nrow(wlddev))) head(fscale(get_vars(wlddev,9:12), wlddev$iso3c, weights)) head(STD(wlddev, ~iso3c, weights, 9:12)) # Using dplyr library(dplyr) wlddev \%>\% group_by(iso3c) \%>\% select(PCGDP,LIFEEX) \%>\% STD() wlddev \%>\% group_by(iso3c) \%>\% select(PCGDP,LIFEEX) \%>\% STD(weights) # weighted standardizing wlddev \%>\% group_by(iso3c) \%>\% select(PCGDP,LIFEEX,POP) \%>\% STD(POP) # weighting by POP -> # ..keeps the weight column unless keep.w = FALSE } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") collapse/man/fFtest.Rd0000644000176200001440000001200414167156464014364 0ustar liggesusers\name{fFtest} \alias{fFtest} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast (Weighted) F-test for Linear Models (with Factors) } \description{ \code{fFtest} computes an R-squared based F-test for the exclusion of the variables in \code{exc}, where the full (unrestricted) model is defined by variables supplied to both \code{exc} and \code{X}. The test is efficient and designed for cases where both \code{exc} and \code{X} may contain multiple factors and continuous variables. } \usage{ fFtest(y, exc, X = NULL, w = NULL, full.df = TRUE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{y}{a numeric vector: The dependent variable.} \item{exc}{a numeric vector, factor, numeric matrix or list / data frame of numeric vectors and/or factors: Variables to test / exclude.} \item{X}{a numeric vector, factor, numeric matrix or list / data frame of numeric vectors and/or factors: Covariates to include in both the restricted (without \code{exc}) and unrestricted model. If left empty (\code{X = NULL}), the test amounts to the F-test of the regression of \code{y} on \code{exc}.} \item{w}{numeric. A vector of (frequency) weights.} \item{full.df}{logical. If \code{TRUE} (default), the degrees of freedom are calculated as if both restricted and unrestricted models were estimated using \code{lm()} (i.e. as if factors were expanded to matrices of dummies). \code{FALSE} only uses one degree of freedom per factor. } \item{\dots}{other arguments passed to \code{fhdwithin}. Sensible options might be the \code{lm.method} argument or further control parameters to \code{fixest::demean}, the workhorse function underlying \code{fhdwithin} for higher-order centering tasks. } } \details{ Factors and continuous regressors are efficiently projected out using \code{\link{fhdwithin}}, and the option \code{full.df} regulates whether a degree of freedom is subtracted for each used factor level (equivalent to dummy-variable estimator / expanding factors), or only one degree of freedom per factor (treating factors as variables). The test automatically removes missing values and considers only the complete cases of \code{y, exc} and \code{X}. Unused factor levels in \code{exc} and \code{X} are dropped. \emph{Note} that an intercept is always added by \code{\link{fhdwithin}}, so it is not necessary to include an intercept in data supplied to \code{exc} / \code{X}. } \value{ A 5 x 3 numeric matrix of statistics. The columns contain statistics: \enumerate{ \item the R-squared of the model \item the numerator degrees of freedom i.e. the number of variables (k) and used factor levels if \code{full.df = TRUE} \item the denominator degrees of freedom: N - k - 1. \item the F-statistic \item the corresponding P-value } The rows show these statistics for: \enumerate{ \item the Full (unrestricted) Model (\code{y ~ exc + X}) \item the Restricted Model (\code{y ~ X}) \item the Exclusion Restriction of \code{exc}. The R-squared shown is simply the difference of the full and restricted R-Squared's, not the R-Squared of the model \code{y ~ exc}. } If \code{X = NULL}, only a vector of the same 5 statistics testing the model (\code{y ~ exc}) is shown. %% ~Describe the value returned %% If it is a LIST, use %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} %% \dots } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{flm}}, \code{\link{fhdwithin}}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## We could use fFtest as a seasonality test: fFtest(AirPassengers, qF(cycle(AirPassengers))) # Testing for level-seasonality fFtest(AirPassengers, qF(cycle(AirPassengers)), # Seasonality test around a cubic trend poly(seq_along(AirPassengers), 3)) fFtest(fdiff(AirPassengers), qF(cycle(AirPassengers))) # Seasonality in first-difference ## A more classical example with only continuous variables fFtest(mtcars$mpg, mtcars[c("cyl","vs")], mtcars[c("hp","carb")]) \donttest{ % requires fixest package ## Now encoding cyl and vs as factors fFtest(mtcars$mpg, dapply(mtcars[c("cyl","vs")], qF), mtcars[c("hp","carb")]) } ## Using iris data: A factor and a continuous variable excluded fFtest(iris$Sepal.Length, iris[4:5], iris[2:3]) ## Testing the significance of country-FE in regression of GDP on life expectancy fFtest(wlddev$PCGDP, wlddev$iso3c, wlddev$LIFEEX) \donttest{ % requires fixest package ## Ok, country-FE are significant, what about adding time-FE fFtest(wlddev$PCGDP, qF(wlddev$year), wlddev[c("iso3c","LIFEEX")]) } # Same test done using lm: data <- na_omit(get_vars(wlddev, c("iso3c","year","PCGDP","LIFEEX"))) full <- lm(PCGDP ~ LIFEEX + iso3c + qF(year), data) rest <- lm(PCGDP ~ LIFEEX + iso3c, data) anova(rest, full) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{htest} % use one of RShowDoc("KEYWORDS") collapse/man/programming.Rd0000644000176200001440000002425514176642305015460 0ustar liggesusers\name{efficient-programming} \alias{AA2-efficient-programming} \alias{efficient-programming} \alias{anyv} \alias{allv} \alias{allNA} \alias{whichv} \alias{whichNA} \alias{alloc} \alias{copyv} \alias{setv} \alias{setop} \alias{\%==\%} \alias{\%!=\%} \alias{\%+=\%} \alias{\%-=\%} \alias{\%*=\%} \alias{\%/=\%} \alias{cinv} \alias{vlengths} \alias{vtypes} \alias{fnlevels} \alias{fnrow} \alias{fncol} \alias{fdim} \alias{missing_cases} \alias{na_rm} \alias{na_omit} \alias{na_insert} \alias{seq_row} \alias{seq_col} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Small Functions to Make R Programming More Efficient } \description{ A small set of functions to addresses some common inefficiencies in R, such as the creation of logical vectors to compare quantities, unnecessary copies of objects in elementary mathematical or subsetting operations, obtaining information about objects (esp. data frames), or dealing with missing values. % It makes sense to use them when dealing with > 10,000 obs. on modern computers. } \usage{ anyv(x, value) # Faster than any(x == value) allv(x, value) # Faster than all(x == value) allNA(x) # Faster than all(is.na(x)) whichv(x, value, # Faster than which(x == value) invert = FALSE) # or which(x != value) whichNA(x, invert = FALSE) # Faster than which((!)is.na(x)) x \%==\% value # Infix for whichv(v, value, FALSE), use e.g. in fsubset x \%!=\% value # Infix for whichv(v, value, TRUE) alloc(value, n) # Faster than rep_len(value, n) copyv(X, v, R, \dots, invert # Faster than replace(x, x == v, r) or replace(x, v, r[v]) = FALSE, vind1 = FALSE) # or replace(x, x != v, r) or replace(x, !v, r[!v]) setv(X, v, R, \dots, invert # Same for x[x (!/=)= v] <- r or x[(!)v] <- r[(!)v] = FALSE, vind1 = FALSE) # modifies x by reference, fastest setop(X, op, V, \dots, # Faster than X <- X +\-\*\/ V (modifies by reference) rowwise = FALSE) # optionally can also add v to rows of a matrix X \%+=\% V # Infix for setop(X, "+", V) X \%-=\% V # Infix for setop(X, "-", V) X \%*=\% V # Infix for setop(X, "*", V) X \%/=\% V # Infix for setop(X, "/", V) na_rm(x) # Fast: if(anyNA(x)) x[!is.na(x)] else x, # also removes NULL / empty elements from list na_omit(X, cols = NULL, # Faster na.omit for matrices and data frames, na.attr = FALSE) # can use selected columns and attach indices na_insert(X, prop = 0.1, # Insert missing values at random value = NA) missing_cases(X, # The oposite of complete.cases(), faster for cols = NULL) # data frames vlengths(X, use.names=TRUE) # Faster version of lengths() (in C, no method dispatch) vtypes(X, use.names = TRUE) # Get data storage types (faster vapply(X, typeof, ...)) fnlevels(x) # Faster version of nlevels(x) (for factors) fnrow(X) # Faster nrow for data frames (not faster for matrices) fncol(X) # Faster ncol for data frames (not faster for matrices) fdim(X) # Faster dim for data frames (not faster for matrices) seq_row(X) # Fast integer sequences along rows of X seq_col(X) # Fast integer sequences along columns of X cinv(x) # Choleski (fast) inverse of symmetric PD matrix, e.g. X'X } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X, V, R}{a vector, matrix or data frame.} \item{x, v}{a (atomic) vector or matrix (\code{na_rm} also supports lists).} \item{value}{a single value of any (atomic) vector type. } \item{invert}{logical. \code{TRUE} considers elements \code{x != value}.} \item{vind1}{logical. If \code{length(v) == 1L}, setting \code{vind1 = TRUE} will interpret \code{v} as an index of \code{X} and \code{R}, rather than a value to search and replace.} \item{op}{an integer or character string indicating the operation to perform. \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab \code{"+"} \tab\tab add \code{V} \cr 2 \tab\tab \code{"-"} \tab\tab subtract \code{V} \cr 3 \tab\tab \code{"*"} \tab\tab multiply by \code{V} \cr 4 \tab\tab \code{"/"} \tab\tab divide by \code{V} \cr } } \item{rowwise}{logical. \code{TRUE} performs the operation between \code{v} and each row of \code{x}. Only applicable if \code{x} is a matrix and \code{v} a vector such that \code{length(v) == ncol(x)}.} \item{cols}{select columns to check for missing values using column names, indices, a logical vector or a function (e.g. \code{is.numeric}). The default is to check all columns, which could be inefficient.} \item{n}{integer. The length of the vector to allocate with \code{value}.} \item{na.attr}{logical. \code{TRUE} adds an attribute containing the removed cases. For compatibility reasons this is exactly the same format as \code{na.omit} i.e. the attribute is called "na.action" and of class "omit".} \item{prop}{double. Specify the proportion of observations randomly replaced with \code{NA}.} \item{use.names}{logical. Preserve names if \code{X} is a list. } \item{\dots}{not used, reserved for possible future arguments.} } \details{ \code{copyv} and \code{setv} are designed to optimize operations that require replacing a single value in an object e.g. \code{X[X == value] <- r} or \code{X[X == value] <- R[R == value]} or simply copying parts of an existing object into another object e.g. \code{X[v] <- R[v]}. Thus they only cover cases where base R is inefficient by either creating a logical vector or materializing a subset to do some replacement. No alternative is provided in cases where base R is efficient e.g. \code{x[v] <- r} or cases provided by \code{\link[data.table]{set}} and \code{\link[data.table]{copy}} from the \emph{data.table} package. Both functions work equivalently, with the difference that \code{copyv} creates a deep copy of the data before making the replacements and returns the copy, whereas \code{setv} modifies the data directly without creating a copy and returns the modified object invisibly. Thus \code{setv} is considerably more efficient. \code{copyv} and \code{setv} perform different tasks, depending on the input. If \code{v} is a scalar, the elements of \code{X} are compared to \code{v}, and the matching ones (or non-matching ones if \code{invert = TRUE}) are replaced with \code{R}, where \code{R} can be either a scalar or an object of the same dimensions as \code{X}. If \code{X} is a data frame, \code{R} can also be a column-vector matching \code{fnrow(X)}. The second option is if \code{v} is either a logical or integer vector of indices with \code{length(v) > 1L}, indicating the elements of a vector / matrix (or rows if \code{X} is a data frame) to replace with corresponding elements from \code{R}. Thus \code{R} has to be of equal dimensions as \code{X}, but could also be a column-vector if \code{X} is a data frame. Setting \code{vind1 = TRUE} ensures that \code{v} is always interpreted as an index, even if \code{length(v) == 1L}. % In this case \code{r} has to be a vector of the same length as \code{x}, and the corresponding elements in \code{v} are replaced with their counterparts in \code{r}. \code{copyv} does all that by first creating a copy of \code{x}, whereas \code{setv} modifies \code{x} directly and is thus more efficient. } \note{ (1) None of these functions currently support complex vectors. (2) It is possible to compare factors by the levels (e.g. \code{iris$Species \%==\% "setosa")}) or using integers (\code{iris$Species \%==\% 1L}). The latter is slightly more efficient. (3) Nothing special is implemented for other objects apart from basic types, e.g. for dates (which are stored as doubles) you need to generate a date object e.g. \code{wlddev$date \%==\% as.Date("2019-01-01")}, \code{wlddev$date \%==\% "2019-01-01"} will give \code{integer(0)}. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \link[=data-transformations]{Data Transformations}, \link[=small-helpers]{Small (Helper) Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Which value whichNA(wlddev$PCGDP) # Same as which(is.na(wlddev$PCGDP)) whichNA(wlddev$PCGDP, invert = TRUE) # Same as which(!is.na(wlddev$PCGDP)) whichv(wlddev$country, "Chad") # Same as which(wlddev$county == "Chad") wlddev$country \%==\% "Chad" # Same thing whichv(wlddev$country, "Chad", TRUE) # Same as which(wlddev$county != "Chad") wlddev$country \%!=\% "Chad" # Same thing lvec <- wlddev$country == "Chad" # If we already have a logical vector... whichv(lvec, FALSE) # is fastver than which(!lvec) rm(lvec) # Using the \%==\% operator can yield tangible performance gains fsubset(wlddev, iso3c \%==\% "DEU") # 3x faster than: fsubset(wlddev, iso3c == "DEU") ## Missing values mtc_na <- na_insert(mtcars, 0.15) # Set 15\% of values missing at random fnobs(mtc_na) # See observation count na_omit(mtc_na) # 12x faster than na.omit(mtc_na) na_omit(mtc_na, na.attr = TRUE) # Adds attribute with removed cases, like na.omit na_omit(mtc_na, cols = c("vs","am")) # Removes only cases missing vs or am na_omit(qM(mtc_na)) # Also works for matrices na_omit(mtc_na$vs, na.attr = TRUE) # Also works with vectors na_rm(mtc_na$vs) # For vectors na_rm is faster ... rm(mtc_na) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. \keyword{utilities} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{manip} \keyword{math} \keyword{documentation} collapse/man/qsu.Rd0000644000176200001440000003572714174223734013753 0ustar liggesusers\name{qsu} \alias{qsu} \alias{qsu.default} \alias{qsu.matrix} \alias{qsu.data.frame} \alias{qsu.pseries} \alias{qsu.pdata.frame} \alias{qsu.sf} \alias{print.qsu} % - Also NEED an '\alias' for EACH other topic documented here. \title{ Fast (Grouped, Weighted) Summary Statistics for Cross-Sectional and Panel Data } \description{ \code{qsu}, shorthand for quick-summary, is an extremely fast summary command inspired by the (xt)summarize command in the STATA statistical software. It computes a set of 7 statistics (nobs, mean, sd, min, max, skewness and kurtosis) using a numerically stable one-pass method generalized from Welford's Algorithm. Statistics can be computed weighted, by groups, and also within-and between entities (for panel data, see Details). } \usage{ qsu(x, \dots) \method{qsu}{default}(x, g = NULL, pid = NULL, w = NULL, higher = FALSE, array = TRUE, stable.algo = TRUE, \dots) \method{qsu}{matrix}(x, g = NULL, pid = NULL, w = NULL, higher = FALSE, array = TRUE, stable.algo = TRUE, \dots) \method{qsu}{data.frame}(x, by = NULL, pid = NULL, w = NULL, cols = NULL, higher = FALSE, array = TRUE, vlabels = FALSE, stable.algo = TRUE, \dots) # Methods for compatibility with plm: \method{qsu}{pseries}(x, g = NULL, w = NULL, effect = 1L, higher = FALSE, array = TRUE, stable.algo = TRUE, \dots) \method{qsu}{pdata.frame}(x, by = NULL, w = NULL, cols = NULL, effect = 1L, higher = FALSE, array = TRUE, vlabels = FALSE, stable.algo = TRUE, \dots) # Methods for compatibility with sf: \method{qsu}{sf}(x, by = NULL, pid = NULL, w = NULL, cols = NULL, higher = FALSE, array = TRUE, vlabels = FALSE, stable.algo = TRUE, \dots) \method{print}{qsu}(x, digits = 4, nonsci.digits = 9, na.print = "-", return = FALSE, print.gap = 2, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a vector, matrix, data frame, panel series (\code{plm::pseries}) or panel data frame (\code{plm::pdata.frame}).} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{by}{\emph{(p)data.frame method}: Same as \code{g}, but also allows one- or two-sided formulas i.e. \code{~ group1 + group2} or \code{var1 + var2 ~ group1 + group2}. See Examples.} \item{pid}{same input as \code{g/by}: Specify a panel-identifier to also compute statistics on between- and within- transformed data. Data frame method also supports one- or two-sided formulas. Transformations are taken independently from grouping with \code{g/by} (grouped statistics are computed on the transformed data if \code{g/by} is also used). However, passing any LHS variables to \code{pid} will overwrite any \code{LHS} variables passed to \code{by}.} \item{w}{a vector of (non-negative) weights. Adding weights will compute the weighted mean, sd, skewness and kurtosis, and transform the data using weighted individual means if \code{pid} is used.} \item{cols}{select columns to summarize using column names, indices, a logical vector or a function (e.g. \code{is.numeric}). Two-sided formulas passed to \code{by} or \code{pid} overwrite \code{cols}.} \item{higher}{logical. Add higher moments (skewness and kurtosis).} \item{array}{logical. If computations have more than 2 dimensions (up to a maximum of 4D: variables, statistics, groups and panel-decomposition) output to array, else output (nested) list of matrices.} \item{stable.algo}{logical. \code{FALSE} uses a faster but less stable method to calculate the standard deviation (see Details of \code{\link{fsd}}). Only available if \code{w = NULL} and \code{higher = FALSE}.} \item{vlabels}{logical. Use variable labels in the summary. See \code{\link{vlabels}}.} \item{effect}{\emph{plm} methods: Select which panel identifier should be used for between and within transformations of the data. 1L takes the first variable in the \code{plm::index}, 2L the second etc.. Index variables can also be called by name using a character string. More than one variable can be supplied. } \item{\dots}{arguments to be passed to or from other methods.} \item{digits}{the number of digits to print after the comma/dot.} \item{nonsci.digits}{the number of digits to print before resorting to scientific notation (default is to print out numbers with up to 9 digits and print larger numbers scientifically).} \item{na.print}{character string to substitute for missing values.} \item{return}{logical. Don't print but instead return the formatted object.} \item{print.gap}{integer. Spacing between printed columns. Passed to \code{print.default}.} } \details{ The algorithm used to compute statistics is well described \href{https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance}{here} [see sections \emph{Welford's online algorithm}, \emph{Weighted incremental algorithm} and \emph{Higher-order statistics}. Skewness and kurtosis are calculated as described in \emph{Higher-order statistics} and are mathematically identical to those implemented in the \emph{moments} package. Just note that \code{qsu} computes the kurtosis (like \code{momens::kurtosis}), not the excess-kurtosis (= kurtosis - 3) defined in \emph{Higher-order statistics}. The \emph{Weighted incremental algorithm} described can easily be generalized to higher-order statistics]. Grouped computations specified with \code{g/by} are carried out extremely efficiently as in \code{fsum} (in a single pass, without splitting the data). If \code{pid} is used, \code{qsu} performs a panel-decomposition of each variable and computes 3 sets of statistics: Statistics computed on the 'Overall' (raw) data, statistics computed on the 'Between' - transformed (pid - averaged) data, and statistics computed on the 'Within' - transformed (pid - demeaned) data. More formally, let \bold{\code{x}} (bold) be a panel vector of data for \code{N} individuals indexed by \code{i}, recorded for \code{T} periods, indexed by \code{t}. \code{xit} then denotes a single data-point belonging to individual \code{i} in time-period \code{t} (\code{t/T} must not represent time). Then \code{xi.} denotes the average of all values for individual \code{i} (averaged over \code{t}), and by extension \bold{\code{xN.}} is the vector (length \code{N}) of such averages for all individuals. If no groups are supplied to \code{g/by}, the 'Between' statistics are computed on \bold{\code{xN.}}, the vector of individual averages. (This means that for a non-balanced panel or in the presence of missing values, the 'Overall' mean computed on \bold{\code{x}} can be slightly different than the 'Between' mean computed on \bold{\code{xN.}}, and the variance decomposition is not exact). If groups are supplied to \code{g/by}, \bold{\code{xN.}} is expanded to the vector \bold{\code{xi.}} (length \code{N x T}) by replacing each value \code{xit} in \bold{\code{x}} with \code{xi.}, while preserving missing values in \bold{\code{x}}. Grouped Between-statistics are then computed on \bold{\code{xi.}}, with the only difference that the number of observations ('Between-N') reported for each group is the number of distinct non-missing values of \bold{\code{xi.}} in each group (not the total number of non-missing values of \bold{\code{xi.}} in each group, which is already reported in 'Overall-N'). See Examples. 'Within' statistics are always computed on the vector \bold{\code{x - xi. + x..}}, where \bold{\code{x..}} is simply the 'Overall' mean computed from \bold{\code{x}}, which is added back to preserve the level of the data. The 'Within' mean computed on this data will always be identical to the 'Overall' mean. In the summary output, \code{qsu} reports not 'N', which would be identical to the 'Overall-N', but 'T', the average number of time-periods of data available for each individual obtained as 'T' = 'Overall-N / 'Between-N'. See Examples. Apart from 'N/T' and the extrema, the standard-deviations ('SD') computed on between- and within- transformed data are extremely valuable because they indicate how much of the variation in a panel-variable is between-individuals and how much of the variation is within-individuals (over time). At the extremes, variables that have common values across individuals (such as the time-variable(s) 't' in a balanced panel), can readily be identified as individual-invariant because the 'Between-SD' on this variable is 0 and the 'Within-SD' is equal to the 'Overall-SD'. Analogous, time-invariant individual characteristics (such as the individual-id 'i') have a 0 'Within-SD' and a 'Between-SD' equal to the 'Overall-SD'. See Examples. \code{qsu} comes with it's own print method which by default writes out up to 9 digits at 4 decimal places. Larger numbers are printed in scientific format. for numbers between 7 and 9 digits, an apostrophe (') is placed after the 6th digit to designate the millions. Missing values are printed using '-'. The \emph{sf} method simply ignores the geometry column. } \value{ A vector, matrix, array or list of matrices of summary statistics. All matrices and arrays have a class 'qsu' and a class 'table' attached. } \note{ In weighted summaries, observations with missing or zero weights are skipped, and thus do not affect any of the calculated statistics, including the observation count. This also implies that a logical vector passed to \code{w} can be used to efficiently summarize a subset of the data. } \references{ Welford, B. P. (1962). Note on a method for calculating corrected sums of squares and products. \emph{Technometrics}. 4 (3): 419-420. doi:10.2307/1266577. } % \author{ %% ~~who you are~~ % } \note{ If weights \code{w} are used together with \code{pid}, transformed data is computed using weighted individual means i.e. weighted \bold{\code{xi.}} and weighted \bold{\code{x..}}. Weighted statistics are subsequently computed on this weighted-transformed data. } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{descr}}, \link[=summary-statistics]{Summary Statistics}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} %% ~~objects to See Also as \code{\link{help}}, ~~~ } \examples{ ## World Development Panel Data # Simple Summaries ------------------------- qsu(wlddev) # Simple summary qsu(wlddev, vlabels = TRUE) # Display variable labels qsu(wlddev, higher = TRUE) # Add skewness and kurtosis # Grouped Summaries ------------------------ qsu(wlddev, ~ region, vlabels = TRUE) # Statistics by World Bank Region qsu(wlddev, PCGDP + LIFEEX ~ income) # Summarize GDP per Capita and Life Expectancy by stats <- qsu(wlddev, ~ region + income, # World Bank Income Level cols = 9:10, higher = TRUE) # Same variables, by both region and income aperm(stats) # A different perspective on the same stats # Panel Data Summaries --------------------- qsu(wlddev, pid = ~ iso3c, vlabels = TRUE) # Adding between and within countries statistics # -> They show amongst other things that year and decade are individual-invariant, # that we have GINI-data on only 161 countries, with only 8.42 observations per country on average, # and that GDP, LIFEEX and GINI vary more between-countries, but ODA received varies more within # countries over time. # Let's do this manually for PCGDP: x <- wlddev$PCGDP g <- wlddev$iso3c # This is the exact variance decomposion all.equal(fvar(x), fvar(B(x, g)) + fvar(W(x, g))) # What qsu does is calculate r <- rbind(Overall = qsu(x), Between = qsu(fmean(x, g)), # Aggregation instead of between-transform Within = qsu(fwithin(x, g, mean = "overall.mean"))) # Same as qsu(W(x, g) + fmean(x)) r[3, 1] <- r[1, 1] / r[2, 1] print.qsu(r) # Proof: qsu(x, pid = g) \donttest{ % No code relying on suggested package # Using plm: pwlddev <- plm::pdata.frame(wlddev, # Creating a Panel Data Frame frame from this data index = c("iso3c","year")) qsu(pwlddev) # Summary for pdata.frame -> qsu(wlddev, pid = ~ iso3c) qsu(pwlddev$PCGDP) # Default summary for Panel Series (class pseries) qsu(G(pwlddev$PCGDP)) # Summarizing GDP growth, see also ?G # Grouped Panel Data Summaries ------------- qsu(wlddev, ~ region, ~ iso3c, cols = 9:12) # Panel-Statistics by region psr <- qsu(pwlddev, ~ region, cols = 9:12) # Same on plm pdata.frame psr # -> Gives a 4D array psr[,"N/T",,] # Checking out the number of observations: # In North america we only have 3 countries, for the GINI we only have 3.91 observations on average # for 45 Sub-Saharan-African countries, etc.. psr[,"SD",,] # Considering only standard deviations # -> In all regions variations in inequality (GINI) between countries are greater than variations # in inequality within countries. The opposite is true for Life-Expectancy in all regions apart # from Europe, etc.. } # Again let's do this manually for PDGCP: d <- cbind(Overall = x, Between = fbetween(x, g), Within = fwithin(x, g, mean = "overall.mean")) r <- qsu(d, g = wlddev$region) r[,"N","Between"] <- fndistinct(g[!is.na(x)], wlddev$region[!is.na(x)]) r[,"N","Within"] <- r[,"N","Overall"] / r[,"N","Between"] r # Proof: qsu(wlddev, PCGDP ~ region, ~ iso3c) # Same as above, but output as nested list psrl <- qsu(wlddev, ~ region, ~ iso3c, cols = 9:12, array = FALSE) psrl # We can use unlist2d to create a tidy data.frame head(unlist2d(psrl, c("Variable","Trans"), row.names = "Region")) # Weighted Summaries ----------------------- n <- nrow(wlddev) weights <- abs(rnorm(n)) # Generate random weights qsu(wlddev, w = weights, higher = TRUE) # Computed weighted mean, SD, skewness and kurtosis weightsNA <- weights # Weights may contain missing values.. inserting 1000 weightsNA[sample.int(n, 1000)] <- NA qsu(wlddev, w = weightsNA, higher = TRUE) # But now these values are removed from all variables # Grouped and panel-summaries can also be weighted in the same manor } % View(psrdat) % # We've gotten this far, let's give it a ggplot2 finish: % psrdat <- reshape2::melt(psrdat, 1:3, % variable.name = "Statistic") # Looks freakin rediculous, but still a nice demonstation % library(ggplot2) % ggplot(psrdat, aes(x = Trans, y = value, fill = Region)) + % geom_bar(stat = "identity", position = position_dodge()) + % facet_wrap(Statistic ~ Variable, scales = "free", ncol = 4) % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{univar} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") collapse/man/fmode.Rd0000644000176200001440000002124114174223734014217 0ustar liggesusers\name{fmode} \alias{fmode} \alias{fmode.default} \alias{fmode.matrix} \alias{fmode.data.frame} \alias{fmode.grouped_df} \title{Fast (Grouped, Weighted) Statistical Mode for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{fmode} is a generic function and returns the (column-wise) statistical mode i.e. the most frequent value of \code{x}, (optionally) grouped by \code{g} and/or weighted by \code{w}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped, weighted) mode. Ties between multiple possible modes can be resolved by taking the minimum, maximum, (default) first or last occurring mode. } \usage{ fmode(x, \dots) \method{fmode}{default}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, ties = "first", \dots) \method{fmode}{matrix}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ties = "first", \dots) \method{fmode}{data.frame}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ties = "first", \dots) \method{fmode}{grouped_df}(x, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, ties = "first", \dots) } \arguments{ \item{x}{a vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{w}{a numeric vector of (non-negative) weights, may contain missing values.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 1 - "replace_fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost. If \code{na.rm = FALSE}, \code{NA} is treated as any other value.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{ties}{an integer or character string specifying the method to resolve ties between multiple possible modes i.e. multiple values with the maximum frequency or sum of weights: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "first" \tab\tab take the first occurring mode. \cr 2 \tab\tab "min" \tab\tab take the smallest of the possible modes. \cr 3 \tab\tab "max" \tab\tab take the largest of the possible modes. \cr 4 \tab\tab "last" \tab\tab take the last occurring mode. \cr } \emph{Note:} \code{"min"/"max"} don't work with character data. For logical data \code{TRUE} will be chosen unless \code{ties = "min"}. See Details. } \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{keep.w}{\emph{grouped_df method:} Logical. Retain \code{sum} of weighting variable after computation (if contained in \code{grouped_df}).} \item{\dots}{arguments to be passed to or from other methods.} } \details{ \code{fmode} implements a pretty fast algorithm to find the statistical mode utilizing index- hashing implemented in the \code{Rcpp::sugar::IndexHash} class. %If all values are distinct, the first value is returned. If there are multiple distinct values having the top frequency, the first value established as having the top frequency when passing through the data from element 1 to element n is returned. If \code{na.rm = FALSE}, \code{NA} is not removed but treated as any other value (i.e. it's frequency is counted). If all values are \code{NA}, \code{NA} is always returned. The weighted mode is computed by summing up the weights for all distinct values and choosing the value with the largest sum. If \code{na.rm = TRUE}, missing values will be removed from both \code{x} and \code{w} i.e. utilizing only \code{x[complete.cases(x,w)]} and \code{w[complete.cases(x,w)]}. It is possible that multiple values have the same mode (the maximum frequency or sum of weights). Typical cases are simply when all values are either all the same or all distinct. In such cases, the default option \code{ties = "first"} returns the first occurring value in the data reaching the maximum frequency count or sum of weights. For example in a sample \code{x = c(1, 3, 2, 2, 4, 4, 1, 7)}, the first mode is 2 as \code{fmode} goes through the data from left to right. \code{ties = "last"} on the other hand gives 1. It is also possible to take the minimum or maximum mode, i.e. \code{fmode(x, ties = "min")} returns 1, and \code{fmode(x, ties = "max")} returns 4. It should be noted that options \code{ties = "min"} and \code{ties = "max"} give unintuitive results for character data (no strict alphabetic sorting, similar to using \code{<} and \code{>} to compare character values in R). These options are also best avoided if missing values are counted (\code{na.rm = FALSE}) since no proper logical comparison with missing values is possible: With numeric data it depends, since in C++ any comparison with \code{NA_real_} evaluates to \code{FALSE}, \code{NA_real_} is chosen as the min or max mode only if it is also the first mode, and never otherwise. For integer data, \code{NA_integer_} is stored as the smallest integer in C++, so it will always be chosen as the min mode and never as the max mode. For character data, \code{NA_character_} is stored as the string \code{"NA"} in C++ and thus the behavior depends on the other character content. \code{fmode} also implements a fast method for logical values which does not support the options \code{"first"/"last"} i.e. \code{TRUE} is returned unless \code{ties = "min"}. This all seamlessly generalizes to grouped computations, which are performed by mapping the data to a sparse-array (except for logical values) and then going group-by group. \code{fmode} preserves all the attributes of the objects it is applied to (apart from names or row-names which are adjusted as necessary in grouped operations). If a data frame is passed to \code{fmode} and \code{drop = TRUE} (the default), \code{\link{unlist}} will be called on the result, which might not be sensible depending on the data at hand. } \value{ The (\code{w} weighted) statistical mode of \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its mode, grouped by \code{g}. See also Details. } \seealso{ \code{\link{fmean}}, \code{\link{fmedian}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ x <- c(1, 3, 2, 2, 4, 4, 1, 7, NA, NA, NA) fmode(x) # Default is ties = "first" fmode(x, ties = "last") fmode(x, ties = "min") fmode(x, ties = "max") fmode(x, na.rm = FALSE) # Here NA is the mode, regardless of ties option fmode(x[-length(x)], na.rm = FALSE) # Not anymore.. ## World Development Data attach(wlddev) ## default vector method fmode(PCGDP) # Numeric mode head(fmode(PCGDP, iso3c)) # Grouped numeric mode head(fmode(PCGDP, iso3c, LIFEEX)) # Grouped and weighted numeric mode fmode(region) # Factor mode fmode(date) # Date mode (defaults to first value since panel is balanced) fmode(country) # Character mode (also defaults to first value) fmode(OECD) # Logical mode # ..all the above can also be performed grouped and weighted ## matrix method m <- qM(airquality) fmode(m) fmode(m, na.rm = FALSE) # NA frequency is also counted fmode(m, airquality$Month) # Groupwise fmode(m, w = airquality$Day) # Weighted: Later days in the month are given more weight fmode(m>50, airquality$Month) # Groupwise logical mode # etc.. ## data.frame method fmode(wlddev) # Calling unlist -> coerce to character vector fmode(wlddev, drop = FALSE) # Gives one row head(fmode(wlddev, iso3c)) # Grouped mode head(fmode(wlddev, iso3c, LIFEEX)) # Grouped and weighted mode detach(wlddev) } \keyword{univar} \keyword{manip} collapse/man/collapse-renamed.Rd0000644000176200001440000000357514166145044016350 0ustar liggesusers\name{collapse-renamed} \alias{collapse-renamed} \alias{fNobs} \alias{fNobs.default} \alias{fNobs.matrix} \alias{fNobs.data.frame} \alias{fNobs.grouped_df} \alias{fNdistinct} \alias{fNdistinct.default} \alias{fNdistinct.matrix} \alias{fNdistinct.data.frame} \alias{fNdistinct.grouped_df} \alias{pwNobs} \alias{fHDwithin} \alias{fHDwithin.default} \alias{fHDwithin.matrix} \alias{fHDwithin.data.frame} \alias{fHDwithin.pseries} \alias{fHDwithin.pdata.frame} \alias{fHDwithin.grouped_df} \alias{fHDbetween} \alias{fHDbetween.default} \alias{fHDbetween.matrix} \alias{fHDbetween.data.frame} \alias{fHDbetween.pseries} \alias{fHDbetween.pdata.frame} \alias{fHDbetween.grouped_df} \alias{as.factor_GRP} \alias{as.factor_qG} \alias{is.GRP} \alias{is.qG} \alias{is.unlistable} \alias{is.categorical} \alias{is.Date} \alias{as.character_factor} \alias{as.numeric_factor} \alias{Date_vars} \alias{Date_vars<-} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Renamed Functions } \description{ These functions were renamed moving from collapse 1.5.3 to 1.6.0 to make the namespace more consistent. With collapse 1.7.0 I have depreciated all methods to \code{fNobs}, \code{fNdistinct}, \code{fHDbetween} and \code{fHDwithin}. The S3 generics and the other functions will be depreciated in 2023 for the earliest. These all now give a message reminding you not to use them in fresh code. } \section{Renaming}{\preformatted{ fNobs -> fnobs fNdistinct -> fndistinct pwNobs -> pwnobs fHDwithin -> fhdwithin fHDbetween -> fhdbetween as.factor_GRP -> as_factor_GRP as.factor_qG -> as_factor_qG is.GRP -> is_GRP is.qG -> is_qG is.unlistable -> is_unlistable is.categorical -> is_categorical is.Date -> is_date as.numeric_factor -> as_numeric_factor as.character_factor -> as_character_factor Date_vars -> date_vars `Date_vars<-` -> `date_vars<-` } } collapse/man/fprod.Rd0000644000176200001440000001267314175334642014252 0ustar liggesusers\name{fprod} \alias{fprod} \alias{fprod.default} \alias{fprod.matrix} \alias{fprod.data.frame} \alias{fprod.grouped_df} \title{Fast (Grouped, Weighted) Product for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{fprod} is a generic function that computes the (column-wise) product of all values in \code{x}, (optionally) grouped by \code{g} and/or weighted by \code{w}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped, weighted) product. } \usage{ fprod(x, \dots) \method{fprod}{default}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, \dots) \method{fprod}{matrix}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{fprod}{data.frame}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{fprod}{grouped_df}(x, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{w}{a numeric vector of (non-negative) weights, may contain missing values.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 1 - "replace_fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost. If \code{na.rm = FALSE} a \code{NA} is returned when encountered.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{keep.w}{\emph{grouped_df method:} Logical. Retain product of weighting variable after computation (if contained in \code{grouped_df}).} \item{\dots}{arguments to be passed to or from other methods.} } \details{ Non-grouped product computations internally utilize long-doubles in C++, for additional numeric precision. Missing-value removal as controlled by the \code{na.rm} argument is done very efficiently by simply skipping them in the computation (thus setting \code{na.rm = FALSE} on data with no missing values doesn't give extra speed). Large performance gains can nevertheless be achieved in the presence of missing values if \code{na.rm = FALSE}, since then the corresponding computation is terminated once a \code{NA} is encountered and \code{NA} is returned (unlike \code{\link{prod}} which just runs through without any checks). This all seamlessly generalizes to grouped computations, which are performed in a single pass (without splitting the data) and therefore extremely fast. The weighted product is computed as \code{prod(x * w)}. If \code{na.rm = TRUE}, missing values will be removed from both \code{x} and \code{w} i.e. utilizing only \code{x[complete.cases(x,w)]} and \code{w[complete.cases(x,w)]}. When applied to data frames with groups or \code{drop = FALSE}, \code{fprod} preserves all column attributes (such as variable labels) but does not distinguish between classed and unclassed objects. The attributes of the data frame itself are also preserved. } \value{ The (\code{w} weighted) product of \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its product, grouped by \code{g}. } \seealso{ \code{\link{fsum}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method mpg <- mtcars$mpg fprod(mpg) # Simple product fprod(mpg, w = mtcars$hp) # Weighted product fprod(mpg, TRA = "/") # Simple transformation: Divide by product fprod(mpg, mtcars$cyl) # Grouped product fprod(mpg, mtcars$cyl, mtcars$hp) # Weighted grouped product fprod(mpg, mtcars[c(2,8:9)]) # More groups.. g <- GRP(mtcars, ~ cyl + vs + am) # Precomputing groups gives more speed ! fprod(mpg, g) fprod(mpg, g, TRA = "/") # Groupwise divide by product ## data.frame method fprod(mtcars) head(fprod(mtcars, TRA = "/")) fprod(mtcars, g) fprod(mtcars, g, use.g.names = FALSE) # No row-names generated ## matrix method m <- qM(mtcars) fprod(m) head(fprod(m, TRA = "/")) fprod(m, g) # etc.. \donttest{ % No code relying on suggested package ## method for grouped data frames - created with dplyr::group_by or fgroup_by library(dplyr) mtcars \%>\% group_by(cyl,vs,am) \%>\% fprod(hp) # Weighted grouped product mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fprod(hp) # Equivalent and faster mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fprod(TRA = "/") mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fselect(mpg) \%>\% fprod() } } \keyword{univar} \keyword{manip} collapse/man/collapse-documentation.Rd0000644000176200001440000003242514170151713017575 0ustar liggesusers\name{collapse-documentation} \alias{A0-collapse-documentation} \alias{collapse-documentation} \alias{.COLLAPSE_TOPICS} \alias{.COLLAPSE_ALL} \alias{.COLLAPSE_GENERIC} \alias{.COLLAPSE_DATA} % \docType{package} \title{Collapse Documentation & Overview} \description{ The following table fully summarizes the contents of \emph{\link{collapse}}. The documentation is structured hierarchically: This is the main overview page, linking to topical overview pages and associated function pages (unless functions are documented on the topic page). % Calling \code{?FUN} brings up the documentation page for \code{FUN}, with links to associated topic pages and closely related functions. % Calling \code{help(FUN)} still brings up the right / most relevant page documenting the function. % % Functions with separate documentation entries (apart from the topic page) are linked. % Each topic further has it's own overview page in the documentation. % , linking to functions % , i.e. only functions with separate pages are linked here } \section{Topics and Functions}{ \tabular{lllll}{ \emph{ Topic } \tab\tab \emph{ Main Features / Keywords} \tab\tab \emph{ Functions } \cr % \Sexpr{"\u200B"} \Sexpr{"\u200B"} \link[=fast-statistical-functions]{Fast Statistical Functions} \tab\tab Fast (grouped and weighted) statistical functions for vector, matrix, data frame and grouped data frames (class 'grouped_df', \emph{dplyr} compatible). \tab\tab \code{\link{fsum}}, \code{\link{fprod}}, \code{\link{fmean}}, \code{\link{fmedian}}, \code{\link{fmode}}, \code{\link{fvar}}, \code{\link{fsd}}, \code{\link{fmin}}, \code{\link{fmax}}, \code{\link{fnth}}, \code{\link{ffirst}}, \code{\link{flast}}, \code{\link{fnobs}}, \code{\link{fndistinct}} \cr \cr \cr \link[=fast-grouping-ordering]{Fast Grouping and Ordering} \tab\tab Fast (ordered) groupings from vectors, data frames, lists. 'GRP' objects are extremely efficient inputs for programming with \emph{collapse}'s fast functions. \code{fgroup_by} can attach them to a data frame, for fast dplyr-style grouped computations. Fast splitting of vectors based on 'GRP' objects, fast radix-sort based ordering and hash-based grouping (the workhorses behind \code{GRP}), fast unique values/rows, factor generation, vector grouping, interactions, generalized run-length type grouping and grouping of time-sequences. % (to optimize different / repeated computations over the same groups). \tab\tab \code{\link{GRP}}, \code{\link{as_factor_GRP}}, \code{\link{GRPnames}}, \code{\link{is_GRP}}, \code{\link{gsplit}}, \code{\link{fgroup_by}}, \code{\link{fgroup_vars}}, \code{\link{fungroup}}, \code{\link[=radixorder]{radixorder(v)}}, \code{\link{group}}, \code{\link{funique}}, \code{\link{qF}}, \code{\link{qG}}, \code{\link{is_qG}}, \code{\link{fdroplevels}}, \code{\link{finteraction}}, \code{\link{groupid}}, \code{\link{seqid}} \cr \cr \cr % \code{GRP} creates 'GRP' objects, and \code{fgroup_by} can be used to attach them to a data frame (analogous to \code{dplyr::group_by}) % (speed about 2x '[' for selecting and 4x '[<-' for replacing). %, get data, variables names, variable indices \link[=fast-data-manipulation]{Fast Data Manipulation} \tab\tab Fast and flexible select, subset, summarise, mutate/transform, sort/reorder, rename and relabel data. In addition a set of (standard evaluation) functions for fast selecting, replacing or adding data frame columns, including shortcuts to select and replace variables by data type. \tab\tab \code{\link[=fselect]{fselect(<-)}}, \code{\link[=fsubset]{fsubset/ss}}, \code{\link{fsummarise}}, \code{\link{fmutate}}, \code{\link{across}}, \code{\link[=ftransform]{(f/set)transform(v)(<-)}}, \code{\link[=fcompute]{fcompute(v)}}, \code{\link[=roworder]{roworder(v)}}, \code{\link[=colorder]{colorder(v)}}, \code{\link[=frename]{(f/set)rename}}, \code{\link[=relabel]{(set)relabel}}, \code{\link[=get_vars]{get_vars(<-)}}, \code{\link[=add_vars]{add_vars(<-)}}, \code{\link[=num_vars]{num_vars(<-)}}, \code{\link[=cat_vars]{cat_vars(<-)}}, \code{\link[=char_vars]{char_vars(<-)}}, \code{\link[=fact_vars]{fact_vars(<-)}}, \code{\link[=logi_vars]{logi_vars(<-)}}, \code{\link[=date_vars]{date_vars(<-)}} \cr \cr \cr \link[=quick-conversion]{Quick Data Conversion} \tab\tab Quick conversions: data.frame <> data.table <> tibble | matrix <> list, data.frame, data.table (row- or column- wise), tibble | array > matrix, data.frame, data.table, tibble | list > data.frame, data.table, tibble | vector > factor, matrix, data.frame, data.table, tibble; and converting factors / all factor columns. \tab\tab \code{qDF}, \code{qDT}, \code{qTBL}, \code{qM}, \code{qF}, \code{mrtl}, \code{mctl}, \code{as_numeric_factor}, \code{as_character_factor} \cr \cr \cr \link[=advanced-aggregation]{Advanced Data Aggregation} \tab\tab Fast and easy (weighted and parallelized) aggregation of multi-type data, with (multiple) functions applied to numeric and categorical columns. Also supports fully customized aggregation tasks mapping functions to columns + renaming. \tab\tab \code{collap(v/g)} \cr \cr \cr \link[=data-transformations]{Data Transformations} \tab\tab Fast row- and column- arithmetic and (object preserving) apply functionality for vectors, matrices and data frames. Fast (grouped) replacing and sweeping of statistics and (grouped and weighted) scaling / standardizing, (higher-dimensional) within- and between-transformations (i.e. centering and averaging), linear prediction and partialling out. Additional methods for grouped_df (\emph{dplyr}) and pseries, pdata.frame (\emph{plm}). \tab\tab \code{\link[=arithmetic]{\%(r/c)r\%}}, \code{\link[=arithmetic]{\%(r/c)(+/-/*//)\%}}, \code{\link{dapply}}, \code{\link{BY}}, \code{\link{TRA}}, \code{\link[=fscale]{fscale/STD}}, \code{\link[=fbetween]{fbetween/B}}, \code{\link[=fwithin]{fwithin/W}}, \code{\link[=HDB]{fhdbetween/HDB}}, \code{\link[=HDW]{fhdwithin/HDW}} \cr \cr \cr Linear Models \tab\tab Fast (weighted) linear model fitting with 6 different solvers and a fast F-test to test exclusion restrictions on linear models with (large) factors. \tab\tab \code{\link{flm}}, \code{\link{fFtest}} \cr \cr \cr \link[=time-series-panel-series]{Time Series and Panel Series} \tab\tab Fast (sequences of) lags / leads and (lagged / leaded and iterated) differences, quasi-differences, (quasi-) log-differences and (compounded) growth rates on (unordered, irregular) time series and panel data. Flexible cumulative summations. Panel data to (ts-)array conversions. Multivariate panel- auto-, partial- and cross-correlation functions. Additional methods for grouped_df (\emph{dplyr}) and pseries, pdata.frame (\emph{plm}). \tab\tab \code{\link[=flag]{flag/L/F}}, \code{\link[=fdiff]{fdiff/D/Dlog}}, \code{\link[=fgrowth]{fgrowth/G}}, \code{\link{fcumsum}}, \code{\link{psmat}}, \code{\link{psacf}}, \code{\link{pspacf}}, \code{\link{psccf}} \cr \link[=list-processing]{List Processing} \tab\tab (Recursive) list search and identification, search and extract list-elements / list-subsetting, splitting, list-transpose, apply functions to lists of data frames / data objects, and (fast) generalized recursive row-binding / unlisting in 2-dimensions / to data frame. \tab\tab \code{\link{is_unlistable}}, \code{\link{ldepth}}, \code{\link{has_elem}}, \code{\link{get_elem}}, \code{\link[=atomic_elem]{atomic_elem(<-)}}, \code{\link[=list_elem]{list_elem(<-)}}, \code{\link{reg_elem}}, \code{\link{irreg_elem}}, \code{\link{rsplit}}, \code{\link{t_list}}, \code{\link{rapply2d}}, \code{\link{unlist2d}} \cr \link[=summary-statistics]{Summary Statistics} \tab\tab Fast (grouped and weighted), summary statistics for cross-sectional and complex multilevel / panel data. Efficient detailed description of data frame. Fast check of variation in data (within groups / dimensions). (Weighted) pairwise correlations and covariances (with observation count, p-value and pretty printing), pairwise observation count. Some additional methods for grouped_df (\emph{dplyr}) pseries and pdata.frame (\emph{plm}). \tab\tab \code{\link{qsu}}, \code{\link{descr}}, \code{\link{varying}}, \code{\link{pwcor}}, \code{\link{pwcov}}, \code{\link{pwnobs}} \cr \cr \cr \cr % (within- and between-groups) ww %Visual Data Exploration \tab\tab Pretty (grouped, weighted, and panel-decomposed) histogram-, density-, scatter- and correlation plots \tab\tab histplot, densplot, scplot, corplot \cr \link[=recode-replace]{Recode and Replace Values} \tab\tab Recode multiple values (exact or regex matching) and replace \code{NaN/Inf/-Inf} and outliers (according to 1- or 2-sided threshold or standard-deviations) in vectors, matrices or data frames. Insert a value at arbitrary positions into vectors, matrices or data frames. \tab\tab \code{recode_num}, \code{recode_char}, \code{replace_NA}, \code{replace_Inf}, \code{replace_outliers}, \code{\link{pad}} \cr \cr \cr \link[=efficient-programming]{(Memory) Efficient Programming} \tab\tab Efficient comparisons of a vector/matrix with a value, and replacing values/rows in vector/matrix/DF (all avoiding the generation of logical vectors or subsets), faster generation of initialized vectors, and fast mathematical operations on vectors/matrices/DF's with no copies at all. Fast missing value detection, (random) insertion and removal, fast data lengths and C storage types, faster \code{nlevels} for factors, fast \code{nrow}, \code{ncol}, \code{dim} (for data frames) and \code{seq_along} rows or columns. Choleski (fast) inverse of symmetric PD matrix. \tab\tab \code{anyv}, \code{allv}, \code{allNA}, \code{whichv}, \code{whichNA}, \code{\%==\%}, \code{\%!=\%}, \code{copyv}, \code{setv}, \code{alloc}, \code{setop}, \code{\%+=\%}, \code{\%-=\%}, \code{\%*=\%}, \code{\%/=\%}, \code{missing_cases}, \code{na_insert}, \code{na_rm}, \code{na_omit}, \code{vlengths}, \code{vtypes}, \code{fnlevels}, \code{fnrow}, \code{fncol}, \code{fdim}, \code{seq_row}, \code{seq_col}, \code{cinv} \cr \cr \cr \link[=small-helpers]{Small (Helper) Functions} \tab\tab Multiple-assignment, non-standard concatenation, set and extract variable labels, extract variable classes, display variable names and labels together, add / remove prefix or postfix to / from column names, not-in operator, matching with error message for non-matched, check exact or near / numeric equality of multiple objects or of all elements in a list, return object with dimnames, row- or colnames efficiently set, or with all attributes removed, C-level functions to set and duplicate / copy attributes, identify categorical and date(-time) objects. \tab\tab \code{massign}, \code{\%=\%}, \code{.c}, \code{vlabels(<-)}, \code{setLabels}, \code{vclasses}, \code{namlab}, \code{add_stub}, \code{rm_stub}, \code{\%!in\%}, \code{ckmatch}, \code{all_identical}, \code{all_obj_equal}, \code{setDimnames}, \code{setRownames}, \code{setColnames}, \code{unattrib}, \code{setAttrib}, \code{copyAttrib}, \code{copyMostAttrib}, \code{is_categorical}, \code{is_date} \cr \cr \cr Data and Global Macros \tab\tab Groningen Growth and Development Centre 10-Sector Database, World Bank World Development dataset, and some global macros containing links to the topical documentation pages (including this page), all exported objects (excluding exported S3 methods), all generic functions, the 2 datasets, all fast functions, all fast statistical (scalar-valued) functions, and all transformation operators (these are not infix functions but function shortcuts resembling operators in a statistical sense, such as the lag/lead operators \code{L}/\code{F}, both wrapping \code{flag}, see \code{\link{.OPERATOR_FUN}}). \tab\tab \code{\link{GGDC10S}, \link{wlddev}, .COLLAPSE_TOPICS, .COLLAPSE_ALL, .COLLAPSE_GENERIC, .COLLAPSE_DATA, .FAST_FUN, .FAST_STAT_FUN, .OPERATOR_FUN} \cr\cr\cr } } \section{\link[=collapse-options]{Package Options}}{ \itemize{ \item \code{options("collapse_unused_arg_action")} sets the action taken by generic statistical functions when unknown arguments are passed to a method. The default is \code{"warning"}. \item \code{options("collapse_mask")} can be used to export copies of functions starting with \code{"f"} when loading the package, removing the leading \code{"f"} (e.g. also exporting \code{subset} as a clone to \code{fsubset}). This will mask like-named base R or \emph{dplyr} functions. \item \code{options("collapse_F_to_FALSE")} can also be called before loading the package to set the lead operator \code{F} in the package to \code{FALSE}, to avoid problems with \code{base::F}. \item When manipulating \emph{data.table}'s, you can set how many columns \emph{collapse} functions overallocate with \code{option("collapse_DT_alloccol")}. The default is \code{100L}. } } \section{Details}{ The added top-level documentation infrastructure in \emph{collapse} allows you to effectively navigate the package. % (as in other commercial software documentations like Mathematica). Calling \code{?FUN} brings up the documentation page documenting the function, which contains links to associated topic pages and closely related functions. You can also call topical documentation pages directly from the console. The links to these pages are contained in the global macro \code{.COLLAPSE_TOPICS} (e.g. calling \code{help(.COLLAPSE_TOPICS[1])} brings up this page). } \author{ \bold{Maintainer}: Sebastian Krantz \email{sebastian.krantz@graduateinstitute.ch} } \seealso{ \link{collapse-package} } % \keyword{package} \keyword{documentation} collapse/man/is_unlistable.Rd0000644000176200001440000000305514176642305015766 0ustar liggesusers\name{is_unlistable} \alias{is_unlistable} \title{ Unlistable Lists } \description{ A (nested) list with atomic objects in all final nodes of the list-tree is unlistable - checked with \code{is_unlistable}. } \usage{ is_unlistable(l, DF.as.list = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ % \item{x}{an R object.} \item{l}{a list.} \item{DF.as.list}{logical. \code{TRUE} treats data frames like (sub-)lists; \code{FALSE} like atomic elements.} } \details{ \code{is_unlistable} with \code{DF.as.list = TRUE} is defined as \code{all(rapply(l, is.atomic))}, whereas \code{DF.as.list = FALSE} yields checking using \code{all(unlist(rapply2d(l, function(x) is.atomic(x) || is.list(x)), use.names = FALSE))}, assuming that data frames are lists composed of atomic elements. If \code{l} contains data frames, the latter can be a lot faster than applying \code{is.atomic} to every data frame column. } \value{ \code{logical(1)} - \code{TRUE} or \code{FALSE}. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } % \note{ %% ~~further notes~~ % } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{ldepth}}, \code{\link{has_elem}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview} } \examples{ l <- list(1, 2, list(3, 4, "b", FALSE)) is_unlistable(l) l <- list(1, 2, list(3, 4, "b", FALSE, e ~ b)) is_unlistable(l) } \keyword{list} \keyword{utilities} collapse/man/fgrowth.Rd0000644000176200001440000002401314175334642014607 0ustar liggesusers\name{fgrowth} \alias{fgrowth} \alias{fgrowth.default} \alias{fgrowth.matrix} \alias{fgrowth.data.frame} \alias{fgrowth.list} \alias{fgrowth.pseries} \alias{fgrowth.pdata.frame} \alias{fgrowth.grouped_df} \alias{G} \alias{G.default} \alias{G.matrix} \alias{G.data.frame} \alias{G.list} \alias{G.pseries} \alias{G.pdata.frame} \alias{G.grouped_df} \title{ % Lagged and Iterated Fast Growth Rates for Time Series and Panel Data } \description{ \code{fgrowth} is a S3 generic to compute (sequences of) suitably lagged / leaded and iterated growth rates, obtained with via the exact method of computation of through log differencing. By default growth rates are provided in percentage terms, but any scale factor can be applied. The growth operator \code{G} is a parsimonious wrapper around \code{fgrowth}, and also provides more flexibility when applied to data frames. } \usage{ fgrowth(x, n = 1, diff = 1, \dots) G(x, n = 1, diff = 1, \dots) \method{fgrowth}{default}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, \dots) \method{G}{default}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, \dots) \method{fgrowth}{matrix}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, \dots) \method{G}{matrix}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, \dots) \method{fgrowth}{data.frame}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, \dots) \method{G}{data.frame}(x, n = 1, diff = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, keep.ids = TRUE, \dots) # Methods for compatibility with plm: \method{fgrowth}{pseries}(x, n = 1, diff = 1, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, \dots) \method{G}{pseries}(x, n = 1, diff = 1, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, \dots) \method{fgrowth}{pdata.frame}(x, n = 1, diff = 1, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, \dots) \method{G}{pdata.frame}(x, n = 1, diff = 1, cols = is.numeric, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, keep.ids = TRUE, \dots) # Methods for grouped data frame / compatibility with dplyr: \method{fgrowth}{grouped_df}(x, n = 1, diff = 1, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, keep.ids = TRUE, \dots) \method{G}{grouped_df}(x, n = 1, diff = 1, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, keep.ids = TRUE, \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame, panel series (\code{plm::pseries}), panel data frame (\code{plm::pdata.frame}) or grouped data frame (class 'grouped_df').} \item{n}{integer. A vector indicating the number of lags or leads.} \item{diff}{integer. A vector of integers > 1 indicating the order of taking growth rates, e.g. \code{diff = 2} means computing the growth rate of the growth rate.} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{by}{\emph{data.frame method}: Same as \code{g}, but also allows one- or two-sided formulas i.e. \code{~ group1} or \code{var1 + var2 ~ group1 + group2}. See Examples.} \item{t}{same input as \code{g/by}, to indicate the time-variable(s). For safe computation of growth rates on unordered time series and panels. Data Frame method also allows one-sided formula i.e. \code{~time}. grouped_df method supports lazy-evaluation i.e. \code{time} (no quotes).} \item{cols}{\emph{data.frame method}: Select columns to compute growth rates using a function, column names, indices or a logical vector. Default: All numeric variables. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{fill}{value to insert when vectors are shifted. Default is \code{NA}. } \item{logdiff}{logical. Compute log-difference growth rates instead of exact growth rates. See Details.} \item{scale}{logical. Scale factor post-applied to growth rates, default is 100 which gives growth rates in percentage terms. See Details.} \item{power}{numeric. Apply a power to annualize or compound growth rates e.g. \code{fgrowth(AirPassengers, 12, power = 1/12)} is equivalent to \code{((AirPassengers/flag(AirPassengers, 12))^(1/12)-1)*100}.} \item{stubs}{logical. \code{TRUE} will rename all computed columns by adding a prefix "L\code{n}G\code{diff}." / "F\code{n}G\code{diff}.", or "L\code{n}Dlog\code{diff}." / "F\code{n}Dlog\code{diff}." if \code{logdiff = TRUE}.} \item{keep.ids}{\emph{data.frame / pdata.frame / grouped_df methods}: Logical. Drop all panel-identifiers from the output (which includes all variables passed to \code{by} or \code{t}). \emph{Note}: For grouped / panel data frames identifiers are dropped, but the 'groups' / 'index' attributes are kept.} \item{\dots}{arguments to be passed to or from other methods.} } \details{ \code{fgrowth/G} by default computes exact growth rates using \code{repeat(diff) ((x[i]/x[i-n])^power - 1)*scale}, and, if \code{logdiff = TRUE} approximate growth rates using \code{repeat(diff) log(x[i]/x[i-n])*scale}. So for \code{diff > 1} it computes growth rate of growth rates etc.. For further details see the help pages for \code{\link{fdiff}} and \code{\link{flag}}. } \value{ \code{x} where the growth rate was taken \code{diff} times using lags \code{n} of itself, scaled by \code{scale}. Computations can be grouped by \code{g/by} and/or ordered by \code{t}. See Details and Examples. } \seealso{ \code{\link[=flag]{flag/L/F}}, \code{\link[=fdiff]{fdiff/D/Dlog}}, \link[=time-series-panel-series]{Time Series and Panel Series}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Simple Time Series: AirPassengers G(AirPassengers) # Growth rate, same as fgrowth(AirPassengers) G(AirPassengers, logdiff = TRUE) # Log-difference G(AirPassengers, 1, 2) # Growth rate of growth rate G(AirPassengers, 12) # Seasonal growth rate (data is monthly) head(G(AirPassengers, -2:2, 1:3)) # Sequence of leaded/lagged and iterated growth rates # let's do some visual analysis plot(G(AirPassengers, c(0, 1, 12))) plot(stl(window(G(AirPassengers, 12), # Taking seasonal growth rate removes most seasonal variation 1950), "periodic")) ## Time Series Matrix of 4 EU Stock Market Indicators, recorded 260 days per year plot(G(EuStockMarkets,c(0,260))) # Plot series and annual growth rates summary(lm(L260G1.DAX ~., G(EuStockMarkets,260))) # Annual growth rate of DAX regressed on the # growth rates of the other indicators ## World Development Panel Data head(fgrowth(num_vars(wlddev), 1, 1, # Computes growth rates of numeric variables wlddev$country, wlddev$year)) # fgrowth requires external inputs.. head(G(wlddev, 1, 1, ~country, ~year)) # Growth of numeric variables, id's attached head(G(wlddev, 1, 1, ~country)) # Without t: Works because data is ordered head(G(wlddev, 1, 1, PCGDP + LIFEEX ~ country, ~year)) # Growth of GDP per Capita & Life Expectancy head(G(wlddev, 0:1, 1, ~ country, ~year, cols = 9:10)) # Same, also retaining original series head(G(wlddev, 0:1, 1, ~ country, ~year, 9:10, # Dropping id columns keep.ids = FALSE)) # Dynamic Panel Data Models: summary(lm(G(PCGDP,1,1,iso3c,year) ~ # GDP growth regressed on it's lagged level L(PCGDP,1,iso3c,year) + # and the growth rate of Life Expanctancy G(LIFEEX,1,1,iso3c,year), data = wlddev)) g = qF(wlddev$country) # Omitting t and precomputing g allows for a summary(lm(G(PCGDP,1,1,g) ~ L(PCGDP,1,g) + # bit more parsimonious specification G(LIFEEX,1,1,g), wlddev)) summary(lm(G1.PCGDP ~., # Now adding level and lagged level of L(G(wlddev,0:1,1, ~ country, ~year,9:10),0:1, # LIFEEX and lagged growth rates ~ country, ~year, keep.ids = FALSE)[-1])) \donttest{ % No code relying on suggested package ## Using plm can make things easier, but avoid attaching or 'with' calls: pwlddev <- plm::pdata.frame(wlddev, index = c("country","year")) head(G(pwlddev, 0:1, 1, 9:10)) # Again growth rates of LIFEEX and PCGDP PCGDP <- pwlddev$PCGDP # A panel-Series of GDP per Capita head(G(PCGDP)) # Growth rate of the panel series summary(lm(G1.PCGDP ~., # Running the dynamic model again -> data = L(G(pwlddev,0:1,1,9:10),0:1, # code becomes a bit simpler keep.ids = FALSE)[-1])) # One could be tempted to also do something like this, but THIS DOES NOT WORK!!: # -> a pseries is only created when subsetting the pdata.frame using $ or [[ summary(lm(G(PCGDP) ~ L(G(PCGDP,0:1)) + L(G(LIFEEX,0:1),0:1), pwlddev)) # To make it work, one needs to create pseries LIFEEX <- pwlddev$LIFEEX summary(lm(G(PCGDP) ~ L(G(PCGDP,0:1)) + L(G(LIFEEX,0:1),0:1))) # THIS WORKS ! ## Using dplyr: library(dplyr) wlddev \%>\% group_by(country) \%>\% select(PCGDP,LIFEEX) \%>\% fgrowth(0:1) # Adding growth rates wlddev \%>\% group_by(country) \%>\% select(year,PCGDP,LIFEEX) \%>\% fgrowth(0:1, t = year) # Also using t (safer) } } \keyword{manip} \keyword{ts} collapse/man/fmin_fmax.Rd0000644000176200001440000001255414175334642015102 0ustar liggesusers\name{fmin-fmax} \alias{fmax} \alias{fmax.default} \alias{fmax.matrix} \alias{fmax.data.frame} \alias{fmax.grouped_df} \alias{fmin} \alias{fmin.default} \alias{fmin.matrix} \alias{fmin.data.frame} \alias{fmin.grouped_df} \title{Fast (Grouped) Maxima and Minima for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{fmax} and \code{fmin} are generic functions that compute the (column-wise) maximum and minimum value of all values in \code{x}, (optionally) grouped by \code{g}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped) maximum or minimum value. } \usage{ fmax(x, \dots) fmin(x, \dots) \method{fmax}{default}(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, \dots) \method{fmin}{default}(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, \dots) \method{fmax}{matrix}(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{fmin}{matrix}(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{fmax}{data.frame}(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{fmin}{data.frame}(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{fmax}{grouped_df}(x, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, \dots) \method{fmin}{grouped_df}(x, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 1 - "replace_fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost. If \code{na.rm = FALSE} a \code{NA} is returned when encountered.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{\dots}{arguments to be passed to or from other methods.} } \details{ Missing-value removal as controlled by the \code{na.rm} argument is done at no extra cost since in C++ any logical comparison involving \code{NA} or \code{NaN} evaluates to \code{FALSE}. Large performance gains can nevertheless be achieved in the presence of missing values if \code{na.rm = FALSE}, since then the corresponding computation is terminated once a \code{NA} is encountered and \code{NA} is returned (unlike \code{\link{max}} and \code{\link{min}} which just run through without any checks). This all seamlessly generalizes to grouped computations, which are performed in a single pass (without splitting the data) and therefore extremely fast. When applied to data frames with groups or \code{drop = FALSE}, \code{fmax} and \code{fmin} preserve all column attributes (such as variable labels) but do not distinguish between classed and unclassed objects. The attributes of the data frame itself are also preserved. } \value{ \code{fmax} returns the maximum value of \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its maximum value, grouped by \code{g}. Analogous, \code{fmin} returns the minimum value \dots } \seealso{ \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method mpg <- mtcars$mpg fmax(mpg) # Maximum value fmin(mpg) # Minimum value (all examples below use fmax but apply to fmin) fmax(mpg, TRA = "\%") # Simple transformation: Take percentage of maximum value fmax(mpg, mtcars$cyl) # Grouped maximum value fmax(mpg, mtcars[c(2,8:9)]) # More groups.. g <- GRP(mtcars, ~ cyl + vs + am) # Precomputing groups gives more speed ! fmax(mpg, g) fmax(mpg, g, TRA = "\%") # Groupwise percentage of maximum value fmax(mpg, g, TRA = "replace") # Groupwise replace by maximum value ## data.frame method fmax(mtcars) head(fmax(mtcars, TRA = "\%")) fmax(mtcars, g) fmax(mtcars, g, use.g.names = FALSE) # No row-names generated ## matrix method m <- qM(mtcars) fmax(m) head(fmax(m, TRA = "\%")) fmax(m, g) # etc.. \donttest{ % No code relying on suggested package ## method for grouped data frames - created with dplyr::group_by or fgroup_by library(dplyr) mtcars \%>\% group_by(cyl,vs,am) \%>\% fmax() mtcars \%>\% group_by(cyl,vs,am) \%>\% fmax("\%") mtcars \%>\% group_by(cyl,vs,am) \%>\% select(mpg) \%>\% fmax() } } \keyword{univar} \keyword{manip} collapse/man/groupid.Rd0000644000176200001440000000433414167156135014604 0ustar liggesusers\name{groupid} \alias{groupid} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Generate Run-Length Type Group-Id } \description{ \code{groupid} is an enhanced version of \code{data.table::rleid} for atomic vectors. It generates a run-length type group-id where consecutive identical values are assigned the same integer. It is a generalization as it can be applied to unordered vectors, generate group id's starting from an arbitrary value, and skip missing values. } \usage{ groupid(x, o = NULL, start = 1L, na.skip = FALSE, check.o = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{an atomic vector of any type. Attributes are not considered.} \item{o}{an (optional) integer ordering vector specifying the order by which to pass through \code{x}.} \item{start}{integer. The starting value of the resulting group-id. Default is starting from 1. For C++ programmers, starting from 0 could be a better choice. } \item{na.skip}{logical. Skip missing values i.e. if \code{TRUE} something like \code{groupid(c("a", NA, "a"))} gives \code{c(1, NA, 1)} whereas \code{FALSE} gives \code{c(1, 2, 3)}.} \item{check.o}{logical. Programmers option: \code{FALSE} prevents checking that each element of \code{o} is in the range \code{[1, length(x)]}, it only checks the length of \code{o}. This gives some extra speed, but will terminate R if any element of \code{o} is too large or too small. } } \value{ An integer vector of class 'qG'. See \code{\link{qG}}. } \seealso{ \code{\link{seqid}}, \code{\link{qG}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \examples{ groupid(airquality$Month) groupid(airquality$Month, start = 0) groupid(wlddev$country)[1:100] ## Same thing since country is alphabetically ordered: (groupid is faster..) all.equal(groupid(wlddev$country), qG(wlddev$country, na.exclude = FALSE)) ## When data is unordered, group-id can be generated through an ordering.. uo <- order(rnorm(fnrow(airquality))) monthuo <- airquality$Month[uo] o <- order(monthuo) groupid(monthuo, o) identical(groupid(monthuo, o)[o], unattrib(groupid(airquality$Month))) } \keyword{manip} collapse/man/fvar_fsd.Rd0000644000176200001440000001776414175334642014740 0ustar liggesusers\name{fvar-fsd} \alias{fvar} \alias{fvar.default} \alias{fvar.matrix} \alias{fvar.data.frame} \alias{fvar.grouped_df} \alias{fsd} \alias{fsd.default} \alias{fsd.matrix} \alias{fsd.data.frame} \alias{fsd.grouped_df} \title{Fast (Grouped, Weighted) Variance and Standard Deviation for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns \description{ \code{fvar} and \code{fsd} are generic functions that compute the (column-wise) variance and standard deviation of \code{x}, (optionally) grouped by \code{g} and/or frequency-weighted by \code{w}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped, weighted) variance/sd. } \usage{ fvar(x, \dots) fsd(x, \dots) \method{fvar}{default}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, stable.algo = TRUE, \dots) \method{fsd}{default}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, stable.algo = TRUE, \dots) \method{fvar}{matrix}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, stable.algo = TRUE, \dots) \method{fsd}{matrix}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, stable.algo = TRUE, \dots) \method{fvar}{data.frame}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, stable.algo = TRUE, \dots) \method{fsd}{data.frame}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, stable.algo = TRUE, \dots) \method{fvar}{grouped_df}(x, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stable.algo = TRUE, \dots) \method{fsd}{grouped_df}(x, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stable.algo = TRUE, \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{w}{a numeric vector of (non-negative) weights, may contain missing values.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 1 - "replace_fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost. If \code{na.rm = FALSE} a \code{NA} is returned when encountered.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{keep.w}{\emph{grouped_df method:} Logical. Retain summed weighting variable after computation (if contained in \code{grouped_df}).} \item{stable.algo}{logical. \code{TRUE} (default) use Welford's numerically stable online algorithm. \code{FALSE} implements a faster but numerically unstable one-pass method. See Details. } \item{\dots}{arguments to be passed to or from other methods.} } \details{ \emph{Welford's online algorithm} used by default to compute the variance is well described \href{https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance}{here} (the section \emph{Weighted incremental algorithm} also shows how the weighted variance is obtained by this algorithm). If \code{stable.algo = FALSE}, the variance is computed in one-pass as \code{(sum(x^2)-n*mean(x)^2)/(n-1)}, where \code{sum(x^2)} is the sum of squares from which the expected sum of squares \code{n*mean(x)^2} is subtracted, normalized by \code{n-1} (Bessel's correction). This is numerically unstable if \code{sum(x^2)} and \code{n*mean(x)^2} are large numbers very close together, which will be the case for large \code{n}, large \code{x}-values and small variances (catastrophic cancellation occurs, leading to a loss of numeric precision). Numeric precision is however still maximized through the internal use of long doubles in C++, and the fast algorithm can be up to 4-times faster compared to Welford's method. The weighted variance is computed with frequency weights as \code{(sum(x^2*w)-sum(w)*weighted.mean(x,w)^2)/(sum(w)-1)}. If \code{na.rm = TRUE}, missing values will be removed from both \code{x} and \code{w} i.e. utilizing only \code{x[complete.cases(x,w)]} and \code{w[complete.cases(x,w)]}. Missing-value removal as controlled by the \code{na.rm} argument is done very efficiently by simply skipping the values (thus setting \code{na.rm = FALSE} on data with no missing values doesn't give extra speed). Large performance gains can nevertheless be achieved in the presence of missing values if \code{na.rm = FALSE}, since then the corresponding computation is terminated once a \code{NA} is encountered and \code{NA} is returned. This all seamlessly generalizes to grouped computations, which are performed in a single pass (without splitting the data) and therefore extremely fast. When applied to data frames with groups or \code{drop = FALSE}, \code{fvar/fsd} preserves all column attributes (such as variable labels) but does not distinguish between classed and unclassed object (thus applying \code{fvar/fsd} to a factor column will give a 'malformed factor' error). The attributes of the data frame itself are also preserved. } \value{ \code{fvar} returns the variance of \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its variance, grouped by \code{g}. \code{fsd} computes the standard deviation of \code{x} in like manor. } \references{ Welford, B. P. (1962). Note on a method for calculating corrected sums of squares and products. \emph{Technometrics}. 4 (3): 419-420. doi:10.2307/1266577. } \seealso{ \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method fvar(mtcars$mpg) # Simple variance (all examples also hold for fvar!) fsd(mtcars$mpg) # Simple standard deviation fsd(mtcars$mpg, w = mtcars$hp) # Weighted sd: Weighted by hp fsd(mtcars$mpg, TRA = "/") # Simple transformation: scaling (See also ?fscale) fsd(mtcars$mpg, mtcars$cyl) # Grouped sd fsd(mtcars$mpg, mtcars$cyl, mtcars$hp) # Grouped weighted sd fsd(mtcars$mpg, mtcars$cyl, TRA = "/") # Scaling by group fsd(mtcars$mpg, mtcars$cyl, mtcars$hp, "/") # Group-scaling using weighted group sds ## data.frame method fsd(iris) # This works, although 'Species' is a factor variable fsd(mtcars, drop = FALSE) # This works, all columns are numeric variables fsd(iris[-5], iris[5]) # By Species: iris[5] is still a list, and thus passed to GRP() fsd(iris[-5], iris[[5]]) # Same thing much faster: fsd recognizes 'Species' is a factor head(fsd(iris[-5], iris[[5]], TRA = "/")) # Data scaled by species (see also fscale) ## matrix method m <- qM(mtcars) fsd(m) fsd(m, mtcars$cyl) # etc.. \donttest{ % No code relying on suggested package ## method for grouped data frames - created with dplyr::group_by or fgroup_by library(dplyr) mtcars \%>\% group_by(cyl,vs,am) \%>\% fsd() mtcars \%>\% group_by(cyl,vs,am) \%>\% fsd(keep.group_vars = FALSE) # Remove grouping columns mtcars \%>\% group_by(cyl,vs,am) \%>\% fsd(hp) # Weighted by hp mtcars \%>\% group_by(cyl,vs,am) \%>\% fsd(hp, "/") # Weighted scaling transformation } } \keyword{univar} \keyword{manip} collapse/man/fndistinct.Rd0000644000176200001440000001125414176642305015276 0ustar liggesusers\name{fndistinct} \alias{fndistinct} \alias{fndistinct.default} \alias{fndistinct.matrix} \alias{fndistinct.data.frame} \alias{fndistinct.grouped_df} \title{Fast (Grouped) Distinct Value Count for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{fndistinct} is a generic function that (column-wise) computes the number of distinct values in \code{x}, (optionally) grouped by \code{g}. It is significantly faster than \code{length(unique(x))}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped) distinct value count. } \usage{ fndistinct(x, \dots) \method{fndistinct}{default}(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, \dots) \method{fndistinct}{matrix}(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{fndistinct}{data.frame}(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{fndistinct}{grouped_df}(x, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, \dots) } \arguments{ \item{x}{a vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 1 - "replace_fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. \code{TRUE}: Skip missing values in \code{x} (faster computation). \code{FALSE}: Also consider 'NA' as one distinct value.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{\dots}{arguments to be passed to or from other methods.} } \details{ \code{fndistinct} implements a fast algorithm to find the number of distinct values utilizing index- hashing implemented in the \code{Rcpp::sugar::IndexHash} class. If \code{na.rm = TRUE} (the default), missing values will be skipped yielding substantial performance gains in data with many missing values. If \code{na.rm = TRUE}, missing values will simply be treated as any other value and read into the hash-map. Thus with the former, a numeric vector \code{c(1.25,NaN,3.56,NA)} will have a distinct value count of 2, whereas the latter will return a distinct value count of 4. Grouped computations are performed by mapping the data to a sparse-array and then hash-mapping each group. This is often not much slower than using a larger hash-map for the entire data when \code{g = NULL}. \code{fndistinct} preserves all attributes of non-classed vectors / columns, and only the 'label' attribute (if available) of classed vectors / columns (i.e. dates or factors). When applied to data frames and matrices, the row-names are adjusted as necessary. } \value{ Integer. The number of distinct values in \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its distinct value count, grouped by \code{g}. } \seealso{ \code{\link{fnobs}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method fndistinct(airquality$Solar.R) # Simple distinct value count fndistinct(airquality$Solar.R, airquality$Month) # Grouped distinct value count ## data.frame method fndistinct(airquality) fndistinct(airquality, airquality$Month) fndistinct(wlddev) # Works with data of all types! head(fndistinct(wlddev, wlddev$iso3c)) ## matrix method aqm <- qM(airquality) fndistinct(aqm) # Also works for character or logical matrices fndistinct(aqm, airquality$Month) \donttest{ % No code relying on suggested package ## method for grouped data frames - created with dplyr::group_by or fgroup_by library(dplyr) airquality \%>\% group_by(Month) \%>\% fndistinct() wlddev \%>\% group_by(country) \%>\% select(PCGDP,LIFEEX,GINI,ODA) \%>\% fndistinct() } } \keyword{univar} \keyword{manip} collapse/man/fsummarise.Rd0000644000176200001440000001205414175334642015304 0ustar liggesusers\name{fsummarise} \alias{fsummarise} \alias{smr} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Summarise } \description{ \code{fsummarize} is a much faster version of \code{dplyr::summarise}, when used together with the \link[=fast-statistical-functions]{Fast Statistical Functions}. } \usage{ fsummarise(.data, ..., keep.group_vars = TRUE) smr(.data, ..., keep.group_vars = TRUE) # Shortcut } %- maybe also 'usage' for other objects documented here. \arguments{ \item{.data}{ a (grouped) data frame or named list of columns. Grouped data can be created with \code{\link{fgroup_by}} or \code{dplyr::group_by}. } \item{\dots}{ name-value pairs of summary functions, or \code{\link{across}} statements. For fast performance use the \link[=fast-statistical-functions]{Fast Statistical Functions}. % The name will be the name of the variable in the result. Functions when applied to a vector need to return a scalar. } \item{keep.group_vars}{ logical. \code{FALSE} removes grouping variables after computation. } } \value{ If \code{.data} is grouped by \code{\link{fgroup_by}} or \code{dplyr::group_by}, the result is a data frame of the same class and attributes with rows reduced to the number of groups. If \code{.data} is not grouped, the result is a data frame of the same class and attributes with 1 row. } \note{ Since v1.7, \code{fsummarise} is fully featured, allowing expressions using functions and columns of the data as well as external scalar values (just like \code{dplyr::summarise}). \bold{NOTE} however that once a \link[=fast-statistical-functions]{Fast Statistical Function} is used, the execution will be vectorized instead of split-apply-combine computing over groups. Please see the first Example. } \seealso{ \code{\link{across}}, \code{\link{collap}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ library(magrittr) # Note: Used because |> is not available on older R versions ## Since v1.7, fsummarise supports arbitrary expressions, and expressions ## containing fast statistical functions receive vectorized execution: # (a) This is an expression using base R functions which is executed by groups mtcars \%>\% fgroup_by(cyl) \%>\% fsummarise(res = mean(mpg) + min(qsec)) # (b) Here, the use of fmean causes the whole expression to be executed # in a vectorized way i.e. the expression is translated to something like # fmean(mpg, g = cyl) + min(mpg) and executed, thus the result is different # from (a), because the minimum is calculated over the entire sample mtcars \%>\% fgroup_by(cyl) \%>\% fsummarise(mpg = fmean(mpg) + min(qsec)) # (c) For fully vectorized execution, use fmin. This yields the same as (a) mtcars \%>\% fgroup_by(cyl) \%>\% fsummarise(mpg = fmean(mpg) + fmin(qsec)) # In across() statements it is fine to mix different functions, each will # be executed on its own terms (i.e. vectorized for fmean and standard for sum) mtcars \%>\% fgroup_by(cyl) \%>\% fsummarise(across(mpg:hp, list(fmean, sum))) # Note that this still detects fmean as a fast function, the names of the list # are irrelevant, but the function name must be typed or passed as a character vector, # Otherwise functions will be executed by groups e.g. function(x) fmean(x) won't vectorize mtcars \%>\% fgroup_by(cyl) \%>\% fsummarise(across(mpg:hp, list(mu = fmean, sum = sum))) # We can force none-vectorized execution by setting .apply = TRUE mtcars \%>\% fgroup_by(cyl) \%>\% fsummarise(across(mpg:hp, list(mu = fmean, sum = sum), .apply = TRUE)) # Another argument of across(): Order the result first by function, then by column mtcars \%>\% fgroup_by(cyl) \%>\% fsummarise(across(mpg:hp, list(mu = fmean, sum = sum), .transpose = FALSE)) #---------------------------------------------------------------------------- # Examples that also work for pre 1.7 versions # Simple use fsummarise(mtcars, mean_mpg = fmean(mpg), sd_mpg = fsd(mpg)) # Using base functions (not a big difference without groups) fsummarise(mtcars, mean_mpg = mean(mpg), sd_mpg = sd(mpg)) \donttest{ % No code relying on suggested package or base Pipe # Grouped use mtcars \%>\% fgroup_by(cyl) \%>\% fsummarise(mean_mpg = fmean(mpg), sd_mpg = fsd(mpg)) # This is still efficient but quite a bit slower on large data (many groups) mtcars \%>\% fgroup_by(cyl) \%>\% fsummarise(mean_mpg = mean(mpg), sd_mpg = sd(mpg)) # Weighted aggregation mtcars \%>\% fgroup_by(cyl) \%>\% fsummarise(w_mean_mpg = fmean(mpg, wt), w_sd_mpg = fsd(mpg, wt)) ## Can also group with dplyr::group_by, but at a conversion cost, see ?GRP library(dplyr) mtcars \%>\% group_by(cyl) \%>\% fsummarise(mean_mpg = fmean(mpg), sd_mpg = fsd(mpg)) # Again less efficient... mtcars \%>\% group_by(cyl) \%>\% fsummarise(mean_mpg = mean(mpg), sd_mpg = sd(mpg)) } } \keyword{manip} collapse/man/time-series-panel-series.Rd0000644000176200001440000000700314167157060017740 0ustar liggesusers\name{time-series-panel-series} % \name{Time Series and Panel Computations} \alias{A7-time-series-panel-series} \alias{time-series-panel-series} % \alias{tscomp} \title{Time Series and Panel Series} % \emph{collapse} \description{ \emph{collapse} provides the following functions to work with time-dependent data: \itemize{ \item \code{\link{flag}}, and the lag- and lead- operators \code{\link{L}} and \code{\link{F}} are S3 generics to efficiently compute sequences of \bold{lags and leads} on ordered or unordered regular / balanced or irregular / unbalanced time series and panel data. \item Similarly, \code{\link{fdiff}}, \code{\link{fgrowth}}, and the operators \code{\link{D}}, \code{\link{Dlog}} and \code{\link{G}} are S3 generics to efficiently compute sequences of suitably lagged / leaded and iterated \bold{differences, log-differences and growth rates}. \code{\link[=fdiff]{fdiff/D/Dlog}} can also compute \bold{quasi-differences} of the form \eqn{x_t - \rho x_{t-1}} or \eqn{log(x_t) - \rho log(x_{t-1})} for log-differences. \item \code{\link{fcumsum}} is an S3 generic to efficiently compute cumulative sums on time series and panel data. In contrast to \code{\link{cumsum}}, it can handle missing values and supports both grouped and ordered computations. \item \code{\link{psmat}} is an S3 generic to efficiently convert panel-vectors or \code{plm::pseries} and data frames or \code{plm::pdata.frame}'s to \bold{panel series matrices and 3D arrays}, respectively. \item \code{\link{psacf}}, \code{\link{pspacf}} and \code{\link{psccf}} are S3 generics to compute estimates of the \bold{auto-, partial auto- and cross- correlation or covariance functions} for panel-vectors or \code{plm::pseries}, and multivariate versions for data frames or \code{plm::pdata.frame}'s. } } \section{Table of Functions}{ \tabular{lllll}{\emph{ S3 Generic } \tab\tab \emph{ Methods } \tab\tab \emph{ Description } \cr % \Sexpr{"\u200B"} \Sexpr{"\u200B"} \Sexpr{"\u200B"} \Sexpr{"\u200B"} \code{\link[=flag]{flag/L/F}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Compute (sequences of) lags and leads \cr \code{\link[=fdiff]{fdiff/D/Dlog}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Compute (sequences of lagged / leaded and iterated) (quasi-)differences or (quasi-)log-differences \cr \code{\link[=fgrowth]{fgrowth/G}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Compute (sequences of lagged / leaded and iterated) growth rates (exact or via log-differencing, in percentage terms) \cr \code{\link{fcumsum}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Compute cumulative sums \cr \code{\link{psmat}} \tab\tab \code{default, pseries, data.frame, pdata.frame} \tab\tab Convert panel data to matrix / array \cr \code{\link{psacf}} \tab\tab \code{default, pseries, data.frame, pdata.frame} \tab\tab Compute ACF on panel data \cr \code{\link{pspacf}} \tab\tab \code{default, pseries, data.frame, pdata.frame} \tab\tab Compute PACF on panel data \cr \code{\link{psccf}} \tab\tab \code{default, pseries, data.frame, pdata.frame} \tab\tab Compute CCF on panel data } } \seealso{ \link[=collapse-documentation]{Collapse Overview}, \link[=data-transformations]{Data Transformations} } \keyword{ts} \keyword{manip} \keyword{documentation} collapse/man/quick-conversion.Rd0000644000176200001440000002057614167146267016445 0ustar liggesusers\name{quick-conversion} \alias{A4-quick-conversion} \alias{quick-conversion} \alias{qDF} \alias{qDT} \alias{qTBL} \alias{qM} \alias{mctl} \alias{mrtl} \alias{as_numeric_factor} \alias{as_character_factor} %- Also NEED an '\alias' for EACH other topic documented here. \title{Quick Data Conversion} \description{ Fast, flexible and precise conversion of common data objects, without method dispatch and extensive checks: \itemize{ \item \code{qDF}, \code{qDT} and \code{qTBL} convert vectors, matrices, higher-dimensional arrays and suitable lists to data frame, \emph{data.table} and \emph{tibble}, respectively. \item \code{qM} converts vectors, higher-dimensional arrays, data frames and suitable lists to matrix. \item \code{mctl} and \code{mrtl} column- or row-wise convert a matrix to list, data frame or \emph{data.table}. They are used internally by \code{qDF} and \code{qDT}, \code{\link{dapply}}, \code{\link{BY}}, etc\dots \item \code{\link{qF}} converts atomic vectors to factor (documented on a separate page). \item \code{as_numeric_factor} and \code{as_character_factor} convert factors, or all factor columns in a data frame / list, to numeric or character (by converting the levels). } } \usage{ # Converting between matrices, data frames / tables / tibbles qDF(X, row.names.col = FALSE, keep.attr = FALSE, class = "data.frame") qDT(X, row.names.col = FALSE, keep.attr = FALSE, class = c("data.table", "data.frame")) qTBL(X, row.names.col = FALSE, keep.attr = FALSE, class = c("tbl_df","tbl","data.frame")) qM(X, keep.attr = FALSE, class = NULL) # Programmer functions: matrix rows or columns to list / DF / DT - fully in C++ mctl(X, names = FALSE, return = "list") mrtl(X, names = FALSE, return = "list") # Converting factors or factor columns as_numeric_factor(X, keep.attr = TRUE) as_character_factor(X, keep.attr = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{a vector, factor, matrix, higher-dimensional array, data frame or list. \code{mctl} and \code{mrtl} only accept matrices, \code{as_numeric_factor} and \code{as_character_factor} only accept factors, data frames or lists.} \item{row.names.col}{should a column capturing names or row.names be added? i.e. when converting atomic objects to data frame or data frame to \emph{data.table}. Can be logical \code{TRUE}, which will add a column \code{"row.names"} in front, or can supply a name for the column i.e. \code{"column1"}.} \item{keep.attr}{logical. \code{FALSE} (default) yields a \emph{hard} / \emph{thorough} object conversion: All unnecessary attributes are removed from the object yielding a plain matrix / data.frame / \emph{data.table}. \code{FALSE} yields a \emph{soft} / \emph{minimal} object conversion: Only the attributes 'names', 'row.names', 'dim', 'dimnames' and 'levels' are modified in the conversion. Other attributes are preserved. See also \code{class}.} \item{class}{if a vector of classes is passed here, the converted object will be assigned these classes. If \code{NULL} is passed, the default classes are assigned: \code{qM} assigns no class, \code{qDF} a class \code{"data.frame"}, and \code{qDT} a class \code{c("data.table", "data.frame")}. If \code{keep.attr = TRUE} and \code{class = NULL} and the object already inherits the default classes, further inherited classes are preserved. See Details and the Example. } \item{names}{logical. Should the list be named using row/column names from the matrix?} \item{return}{an integer or string specifying what to return. The options are: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "list" \tab\tab returns a plain list \cr 2 \tab\tab "data.frame" \tab\tab returns a plain data.frame \cr 3 \tab\tab "data.table" \tab\tab returns a plain \emph{data.table} \cr } } } \details{ Object conversions using these functions are maximally efficient and involve 3 consecutive steps: (1) Converting the storage mode / dimensions / data of the object, (2) converting / modifying the attributes and (3) modifying the class of the object: (1) is determined by the choice of function and the optional \code{row.names.col} argument to \code{qDF} and \code{qDT}. Higher-dimensional arrays are converted by expanding the second dimension (adding columns, same as \code{as.matrix, as.data.frame, as.data.table}). (2) is determined by the \code{keep.attr} argument: \code{keep.attr = TRUE} seeks to preserve the attributes of the object. It's effect is like copying \code{attributes(converted) <- attributes(original)}, and then modifying the \code{"dim", "dimnames", "names", "row.names"} and \code{"levels"} attributes as necessitated by the conversion task. \code{keep.attr = FALSE} only converts / assigns / removes these attributes and drops all others. (3) is determined by the \code{class} argument: Setting \code{class = "myclass"} will yield a converted object of class \code{"myclass"}, with any other / prior classes being removed by this replacement. Setting \code{class = NULL} does NOT mean that a class \code{NULL} is assigned (which would remove the class attribute), but rather that the default classes are assigned: \code{qM} assigns no class, \code{qDF} a class \code{"data.frame"}, and \code{qDT} a class \code{c("data.table", "data.frame")}. At this point there is an interaction with \code{keep.attr}: If \code{keep.attr = TRUE} and \code{class = NULL} and the object converted already inherits the respective default classes, then any other inherited classes will also be preserved (with \code{qM(x, keep.attr = TRUE, class = NULL)} any class will be preserved if \code{is.matrix(x)} evaluated to \code{TRUE}.) The default \code{keep.attr = FALSE} ensures \emph{hard} conversions so that all unnecessary attributes are dropped. Furthermore in \code{qDF} and \code{qDT} the default classes were explicitly assigned, thus any other classes (like 'tbl_df', 'tbl', 'pdata.frame', 'sf', 'tsibble' etc.) will be removed when these objects are passed, regardless of the \code{keep.attr} setting. This is to ensure that the default methods for 'data.frame' and 'data.table' can be assumed to work, even if the user chooses to preserve further attributes. For \code{qM} a more lenient default setup was chosen to enable the full preservation of time series matrices with \code{keep.attr = TRUE}. If the user wants to keep attributes attached to a matrix but make sure that all default methods work properly, either one of \code{qM(x, keep.attr = TRUE, class = "matrix")} or \code{unclass(qM(x, keep.attr = TRUE))} should be employed. } \value{ \code{qDF} - returns a data.frame\cr \code{qDT} - returns a \emph{data.table}\cr \code{qTBL} - returns a \emph{tibble}\cr \code{qM} - returns a matrix\cr \code{mctl}, \code{mrtl} - return a list, data frame or \emph{data.table} \cr \code{qF} - returns a factor\cr \code{as_numeric_factor} - returns X with factors converted to numeric variables\cr \code{as_character_factor} - returns X with factors converted to character variables } % \note{ % \code{qTBL} works similarly to \code{qDT} assigning different classes, i.e. \code{qTBL(x)} is equivalent to \code{qDT(x, class = c("tbl_df", "tbl", "data.frame"))}. Similar converters for other data frame based classes are easily created from \code{qDF} and \code{qDT}. The principle difference between them is that \code{qDF} preserves rownames whereas \code{qDT} always assigns integer rownames. % } \seealso{ \code{\link{qF}}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Basic Examples mtcarsM <- qM(mtcars) # Matrix from data.frame mtcarsDT <- qDT(mtcarsM) # data.table from matrix columns mtcarsTBL <- qTBL(mtcarsM) # tibble from matrix columns head(mrtl(mtcarsM, TRUE, "data.frame")) # data.frame from matrix rows, etc.. head(qDF(mtcarsM, "cars")) # Adding a row.names column when converting from matrix head(qDT(mtcars, "cars")) # Saving row.names when converting data frame to data.table cylF <- qF(mtcars$cyl) # Factor from atomic vector cylF # Factor to numeric conversions identical(mtcars, as_numeric_factor(dapply(mtcars, qF))) % ## Explaining the interaction of keep.attr and class. Consider the time series EuStockMarkets % plot() } \keyword{manip} \keyword{documentation} collapse/man/radixorder.Rd0000644000176200001440000001031514167156135015272 0ustar liggesusers\name{radixorder} \alias{radixorder} \alias{radixorderv} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Radix-Based Ordering } \description{ A slight modification of \code{\link[=order]{order(..., method = "radix")}} that is more programmer friendly and, importantly, provides features for ordered grouping of data (similar to \code{data.table:::forderv} which has more or less the same source code). \code{radixorderv} is a programmers version directly supporting vector and list input. % Apart from added grouping features, the source code and standard functionality is identical to \code{\link{order(\dots, method = "radix")}. } \usage{ radixorder(\dots, na.last = TRUE, decreasing = FALSE, starts = FALSE, group.sizes = FALSE, sort = TRUE) radixorderv(x, na.last = TRUE, decreasing = FALSE, starts = FALSE, group.sizes = FALSE, sort = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{\dots}{comma-separated atomic vectors to order. } \item{x}{ an atomic vector or list of atomic vectors such as a data frame. } \item{na.last}{logical. for controlling the treatment of \code{NA}'s. If \code{TRUE}, missing values in the data are put last; if \code{FALSE}, they are put first; if NA, they are removed. } \item{decreasing}{ logical. Should the sort order be increasing or decreasing? Can be a vector of length equal to the number of arguments in \code{\dots} / \code{x}. } \item{starts}{logical. \code{TRUE} returns an attribute 'starts' containing the first element of each new group i.e. the row denoting the start of each new group if the data were sorted using the computed ordering vector. See Examples. %% ~~Describe \code{starts} here~~ } \item{group.sizes}{logical. \code{TRUE} returns an attribute 'group.sizes' containing sizes of each group in the same order as groups are encountered if the data were sorted using the computed ordering vector. See Examples. } \item{sort}{logical. This argument only affects character vectors / columns passed. If \code{FALSE}, these are not ordered but simply grouped in the order of first appearance of unique elements. This provides a slight performance gain if only grouping but not alphabetic ordering is required. See also \code{\link{group}}. %% ~~Describe \code{sort} here~~ } } % \details{ % \code{radixorder} works just like \code{\link[=order]{order(\dots, method = "radix")}}, the source code is the same. However if \code{starts = TRUE}, and attribute % } %} \value{ An integer ordering vector with attributes: Unless \code{na.last = NA} an attribute 'sorted' indicating whether the input data was already sorted is attached. If \code{starts = TRUE}, 'starts' giving a vector of group starts in the ordered data, and if \code{group.sizes = TRUE}, 'group.sizes' giving the vector of group sizes are attached. In either case an attribute 'maxgrpn' providing the size of the largest group is also attached. } \author{ The C code was taken - with slight modifications, from \href{https://github.com/wch/r-source/blob/79298c499218846d14500255efd622b5021c10ec/src/main/radixsort.c}{base R source code}, and is originally due to \emph{data.table} authors Matt Dowle and Arun Srinivasan. } \seealso{ \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \examples{ radixorder(mtcars$mpg) head(mtcars[radixorder(mtcars$mpg), ]) radixorder(mtcars$cyl, mtcars$vs) o <- radixorder(mtcars$cyl, mtcars$vs, starts = TRUE) st <- attr(o, "starts") head(mtcars[o, ]) mtcars[o[st], c("cyl", "vs")] # Unique groups # Note that if attr(o, "sorted") == TRUE, then all(o[st] == st) radixorder(rep(1:3, each = 3), starts = TRUE) # Group sizes radixorder(mtcars$cyl, mtcars$vs, group.sizes = TRUE) # Both radixorder(mtcars$cyl, mtcars$vs, starts = TRUE, group.sizes = TRUE) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{manip} % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/colorder.Rd0000644000176200001440000000627014201327077014737 0ustar liggesusers\name{colorder} \alias{colorder} \alias{colorderv} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Reordering of Data Frame Columns } \description{ Efficiently reorder columns in a data frame (no copies). To do this by reference see also \code{data.table::setcolorder}. } \usage{ colorder(.X, \dots, pos = "front") colorderv(X, neworder = radixorder(names(X)), pos = "front", regex = FALSE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{.X, X}{a data frame or list.} \item{\dots}{for \code{colorder}: Column names of \code{.X} in the new order (can also use sequences i.e. \code{col1:coln, newname = colk, \dots}). For \code{colorderv}: Further arguments to \code{\link{grep}} if \code{regex = TRUE}.} \item{neworder}{a vector of column names, positive indices, a suitable logical vector, a function such as \code{is.numeric}, or a vector of regular expressions matching column names (if \code{regex = TRUE}). } \item{pos}{integer or character. Different options regarding column arrangement if \code{...length() < ncol(.X)} (or \code{length(neworder) < ncol(X)}). \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "front" \tab\tab move specified columns to the front (the default). \cr 2 \tab\tab "end" \tab\tab move specified columns to the end. \cr 3 \tab\tab "exchange" \tab\tab just exchange the positions of selected columns, other columns remain in the same position. \cr 4 \tab\tab "after" \tab\tab place all further selected columns behind the first selected column. \cr } } \item{regex}{logical. \code{TRUE} will do regular expression search on the column names of \code{X} using a (vector of) regular expression(s) passed to \code{neworder}. Matching is done using \code{\link{grep}}. \emph{Note} that multiple regular expressions will be matched in the order they are passed, and \code{\link{funique}} will be applied to the resulting set of indices. } } \value{ \code{.X/X} with columns re-ordered (no deep copy). } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{roworder}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview} } \examples{ head(colorder(mtcars, vs, cyl:hp, am)) head(colorder(mtcars, vs, cyl:hp, am, pos = "end")) head(colorder(mtcars, vs, cyl:hp, am, pos = "after")) head(colorder(mtcars, vs, cyl, pos = "exchange")) head(colorder(mtcars, vs, cyl:hp, new = am)) # renaming ## Same in standard evaluation head(colorderv(mtcars, c(8, 2:4, 9))) head(colorderv(mtcars, c(8, 2:4, 9), pos = "end")) head(colorderv(mtcars, c(8, 2:4, 9), pos = "after")) head(colorderv(mtcars, c(8, 2), pos = "exchange")) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{ manip } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/GRP.Rd0000644000176200001440000004210214201327077013550 0ustar liggesusers\name{GRP} \alias{GRP} \alias{GRP.GRP} \alias{GRP.default} \alias{GRP.factor} \alias{GRP.qG} \alias{GRP.pseries} \alias{GRP.pdata.frame} \alias{GRP.grouped_df} \alias{fgroup_by} \alias{gby} \alias{fgroup_vars} \alias{fungroup} \alias{gsplit} \alias{is_GRP} \alias{length.GRP} \alias{print.GRP} \alias{plot.GRP} \alias{GRPnames} \alias{GRPN} \alias{as_factor_GRP} \title{Fast Grouping / \emph{collapse} Grouping Objects} \description{ \code{GRP} performs fast, ordered and unordered, groupings of vectors and data frames (or lists of vectors) using \code{\link{radixorderv}} or \code{\link{group}}. The output is a list-like object of class 'GRP' which can be printed, plotted and used as an efficient input to all of \emph{collapse}'s fast statistical and transformation functions / operators, as well as to \code{\link{collap}}, \code{\link{BY}} and \code{\link{TRA}}. \code{fgroup_by} is similar to \code{dplyr::group_by} but faster. It creates a grouped data frame with a 'GRP' object attached - for faster dplyr-like programming with \emph{collapse}'s fast functions. There are also several conversion methods to convert to and from 'GRP' objects. Notable among these is \code{GRP.grouped_df}, which returns a 'GRP' object from a grouped data frame created with \code{dplyr::group_by} or \code{fgroup_by}, and the duo \code{GRP.factor} and \code{as_factor_GRP}. \code{gsplit} efficiently splits a vector based on a grouping object. } \usage{ GRP(X, \dots) \method{GRP}{default}(X, by = NULL, sort = TRUE, decreasing = FALSE, na.last = TRUE, return.groups = TRUE, return.order = sort, method = "auto", call = TRUE, \dots) \method{GRP}{factor}(X, \dots, group.sizes = TRUE, drop = FALSE, return.groups = TRUE, call = TRUE) \method{GRP}{qG}(X, \dots, group.sizes = TRUE, return.groups = TRUE, call = TRUE) \method{GRP}{pseries}(X, effect = 1L, \dots, group.sizes = TRUE, return.groups = TRUE, call = TRUE) \method{GRP}{pdata.frame}(X, effect = 1L, \dots, group.sizes = TRUE, return.groups = TRUE, call = TRUE) \method{GRP}{grouped_df}(X, \dots, return.groups = TRUE, call = TRUE) # Identify, get length, group names, and convert GRP object to factor is_GRP(x) \method{length}{GRP}(x) GRPN(x, expand = TRUE, ...) GRPnames(x, force.char = TRUE) as_factor_GRP(x, ordered = FALSE) # Efficiently split a vector using a grouping object gsplit(x, g, use.g.names = FALSE, ...) # Fast, class-agnostic version of dplyr::group_by for use with fast functions, see details fgroup_by(.X, \dots, sort = TRUE, decreasing = FALSE, na.last = TRUE, return.order = sort, method = "auto") # Shortcut for fgroup_by gby(.X, \dots, sort = TRUE, decreasing = FALSE, na.last = TRUE, return.order = sort, method = "auto") # Get grouping columns from a grouped data frame created with dplyr::group_by or fgroup_by fgroup_vars(X, return = "data") # Ungroup grouped data frame created with dplyr::group_by or fgroup_by fungroup(X, \dots) \method{print}{GRP}(x, n = 6, \dots) \method{plot}{GRP}(x, breaks = "auto", type = "s", horizontal = FALSE, \dots) } \arguments{ \item{X}{a vector, list of columns or data frame (default method), or a classed object (conversion / extractor methods).} \item{.X}{a data frame or list.} \item{x, g}{a 'GRP' object. For \code{gsplit}, \code{x} can be a vector of any type, or \code{NULL} to return the integer indices of the groups. Both \code{gsplit/GRPN} also support vectors or data frames to be passed to \code{g/x}.} \item{by}{if \code{X} is a data frame or list, \code{by} can indicate columns to use for the grouping (by default all columns are used). Columns must be passed using a vector of column names, indices, or using a one-sided formula i.e. \code{~ col1 + col2}.} \item{sort}{logical. If \code{FALSE}, groups are not ordered but simply grouped in the order of first appearance of unique elements / rows. This often provides a performance gain if the data was not sorted beforehand. See also \code{method}.} \item{ordered}{logical. \code{TRUE} adds a class 'ordered' i.e. generates an ordered factor.} \item{decreasing}{logical. Should the sort order be increasing or decreasing? Can be a vector of length equal to the number of arguments in \code{X} / \code{by} (argument passed to \code{\link{radixorderv}}).} \item{na.last}{logical. If missing values are encountered in grouping vector/columns, assign them to the last group (argument passed to \code{\link{radixorderv}}).} \item{return.groups}{logical. Include the unique groups in the created GRP object.} \item{return.order}{logical. Include the output from \code{\link{radixorderv}} (or \code{\link{group}}) in the created GRP object. This brings performance improvements in \code{gsplit} if \code{sort = TRUE} (and thus also benefits grouped execution of base R functions), but has a memory cost by making the object larger. } \item{method}{character. The algorithm to use for grouping: either \code{"radix"}, \code{"hash"} or \code{"auto"}. \code{"auto"} will chose \code{"radix"} when \code{sort = TRUE}, yielding ordered grouping via \code{\link{radixorderv}}, and \code{"hash"}-based grouping in first-appearance order via \code{\link{group}} otherwise. It is possibly to put \code{method = "radix"} and \code{sort = FALSE}, which will group character data in first appearance order but sort numeric data (a good hybrid option). \code{method = "hash"} currently does not support any sorting, thus putting \code{sort = TRUE} will simply be ignored.} \item{group.sizes}{logical. \code{TRUE} tabulates factor levels using \code{\link{tabulate}} to create a vector of group sizes; \code{FALSE} leaves that slot empty when converting from factors.} \item{drop}{logical. \code{TRUE} efficiently drops unused factor levels beforehand using \code{\link{fdroplevels}}.} \item{call}{logical. \code{TRUE} calls \code{\link{match.call}} and saves it in the final slot of the GRP object.} \item{expand}{logical. \code{FALSE} returns the group sizes (computed in first-appearance-order of groups if \code{x} is not already a 'GRP' object). \code{TRUE} returns a vector the same length as the data.} \item{force.char}{logical. Always output group names as character vector, even if a single numeric vector was passed to \code{GRP.default}.} \item{effect}{\emph{plm} methods: Select which panel identifier should be used as grouping variable. 1L takes the first variable in the \code{plm::index}, 2L the second etc., identifiers can also be passed as a character string. More than one variable can be supplied. } \item{return}{an integer or string specifying what \code{fgroup_vars} should return. The options are: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "data" \tab\tab full grouping columns (default) \cr 2 \tab\tab "unique" \tab\tab unique rows of grouping columns \cr 3 \tab\tab "names" \tab\tab names of grouping columns \cr 4 \tab\tab "indices" \tab\tab integer indices of grouping columns \cr 5 \tab\tab "named_indices" \tab\tab named integer indices of grouping columns \cr 6 \tab\tab "logical" \tab\tab logical selection vector of grouping columns \cr 7 \tab\tab "named_logical" \tab\tab named logical selection vector of grouping columns \cr } } \item{use.g.names}{logical. \code{TRUE} returns a named list, like \code{\link{split}}. \code{FALSE} is slightly more efficient. } % \item{untibble}{logical. \code{TRUE} also removes classes \code{c("tbl_df", "tbl")} from \code{X}. \code{fgroup_by} attaches an attribute \code{"was.tibble"} indicating if \code{X} was a tibble prior to grouping. The argument thus defaults to \code{TRUE} if this attribute is attached and equal to \code{FALSE}, so that \code{identical(X, X |> fgroup_by(...) |> fungroup())}, regardless of the nature of \code{X}. } \item{n}{integer. Number of groups to print out.} \item{breaks}{integer. Number of breaks in the histogram of group-sizes.} \item{type}{linetype for plot.} \item{horizontal}{logical. \code{TRUE} arranges plots next to each other, instead of above each other.} \item{\dots}{for \code{fgroup_by}: unquoted comma-separated column names, sequences of columns, expressions involving columns, and column names, indices, logical vectors or selector functions. See Examples. For \code{gsplit} and \code{GRPN}: further arguments passed to \code{GRP} (if \code{g/x} is not already a 'GRP' object). For example the \code{by} argument could be used if a data frame is passed.} } \details{ \code{GRP} is a central function in the \emph{collapse} package because it provides the key inputs to facilitate easy and efficient groupwise-programming at the \code{C/C++} level: Information about (1) the number of groups (2) an integer group-id indicating which values / rows belong to which group and (3) information about the size of each group. Provided with these informations, \emph{collapse}'s \link[=fast-statistical-functions]{Fast Statistical Functions} pre-allocate intermediate and result vectors of the right sizes and (in most cases) perform grouped statistical computations in a single pass through the data. The sorting and ordering functionality for \code{GRP} only affects (2), that is groups receive different integer-id's depending on whether the groups are sorted \code{sort = TRUE}, and in which order (argument \code{decreasing}). This in-turn changes the order of values/rows in the output of \emph{collapse} functions. % \emph{Note} that \code{sort = FALSE} is only effective on character vectors, numeric grouping vectors will always produce ordered groupings. %This in-turn changes the order of values/rows in the output of \emph{collapse} functions (the row/value corresponding to group 1 always comes out on top). The default setting with \code{sort = TRUE} and \code{decreasing = FALSE} results in groups being sorted in ascending order. This is equivalent to performing grouped operations in \emph{data.table} using \code{keyby}, whereas \code{sort = FALSE} is equivalent to \emph{data.table} grouping with \code{by}, however this only works if the \code{by} columns are character, numeric grouping columns are always ordered. Next to \code{GRP}, there is the function \code{fgroup_by} as a significantly faster alternative to \code{dplyr::group_by}. It creates a grouped data frame by attaching a 'GRP' object to a data frame. \emph{collapse} functions with a grouped_df method applied to that data frame will yield grouped computations. Note that \code{fgroup_by} can only be used in combination with \emph{collapse} functions, not with \code{dplyr::summarize} or \code{dplyr::mutate} (the grouping object and method of computing results is different). The converse is not true, you can group data with \code{dplyr::group_by} and then apply \emph{collapse} functions. \code{fgroup_by} is class-agnostic, i.e. the classes of the data frame or list passed are preserved, and all standard methods (like subsetting with \code{`[`} or \code{print} methods) apply to the grouped object. Apart from the class 'grouped_df' which is added behind any classes the object might inherit (apart from 'data.frame'), a class 'GRP_df' is added in front. This class responds to \code{print} method and subset (\code{`[`}) methods. Both first call the corresponding method for the object and then print / attach the grouping information. \code{print.GRP_df} prints one line below the object indicating the grouping variables, followed, in square brackets, by some statistics on the group sizes: \code{[N | Mean (SD) Min-Max]}. The mean is rounded to a full number and the standard deviation (SD) to one digit. Minimum and maximum are only displayed if the SD is non-zero. \code{GRP} is an S3 generic function with one default method supporting vector and list input and several conversion methods: The conversion of factors to 'GRP' objects by \code{GRP.factor} involves obtaining the number of groups calling \code{ng <- fnlevels(f)} and then computing the count of each level using \code{\link[=tabulate]{tabulate(f, ng)}}. The integer group-id (2) is already given by the factor itself after removing the levels and class attributes and replacing any missing values with \code{ng + 1L}. The levels are put in a list and moved to position (4) in the 'GRP' object, which is reserved for the unique groups. Going from factor to 'GRP' object thus only requires a tabulation of the levels, whereas creating a factor from a 'GRP' object using \code{as_factor_GRP} does not involve any computations, but may involve interacting multiple columns using the \code{paste} function to produce unique factor levels (if multiple grouping columns were used). % or \code{\link{as.character}} conversions if the grouping column(s) were numeric (which are potentially expensive). The method \code{GRP.grouped_df} takes the 'groups' attribute from a grouped data frame and converts it to a 'GRP' object. If the grouped data frame was generated using \code{fgroup_by}, all work is done already. If it was created using \code{dplyr::group_by}, a C routine is called to efficiently convert the grouping object. \emph{Note}: For faster factor generation and a factor-light class 'qG' which avoids the coercion of factor levels to character also see \code{\link{qF}} and \code{\link{qG}}. } \value{ A list-like object of class `GRP' containing information about the number of groups, the observations (rows) belonging to each group, the size of each group, the unique group names / definitions, whether the groups are ordered or not and the ordering vector used to perform the ordering. The object is structured as follows: \tabular{lllllll}{\emph{ List-index } \tab\tab \emph{ Element-name } \tab\tab \emph{ Content type } \tab\tab \emph{ Content description} \cr [[1]] \tab\tab N.groups \tab\tab \code{integer(1)} \tab\tab Number of Groups \cr [[2]] \tab\tab group.id \tab\tab \code{integer(NROW(X))} \tab\tab An integer group-identifier \cr [[3]] \tab\tab group.sizes \tab\tab \code{integer(N.groups)} \tab\tab Vector of group sizes \cr [[4]] \tab\tab groups \tab\tab \code{unique(X)} or \code{NULL} \tab\tab Unique groups (same format as input, except for \code{fgroup_by} which uses a plain list, sorted if \code{sort = TRUE}), or \code{NULL} if \code{return.groups = FALSE} \cr [[5]] \tab\tab group.vars \tab\tab \code{character} \tab\tab The names of the grouping variables \cr [[6]] \tab\tab ordered \tab\tab \code{logical(2)} \tab\tab \code{[1]- TRUE} if \code{sort = TRUE}, \code{[2]- TRUE} if \code{X} already sorted \cr [[7]] \tab\tab order \tab\tab \code{integer(NROW(X))} or \code{integer(0)} (with attributes), or \code{NULL} \tab\tab Ordering vector from \code{radixorderv} or \code{group} (with \code{"starts"} attribute) or \code{NULL} if \code{return.order = FALSE} \cr [[8]] \tab\tab call \tab\tab \code{match.call()} or \code{NULL} \tab\tab The \code{GRP()} call, obtained from \code{match.call()}, or \code{NULL} if \code{call = FALSE} } } \seealso{ \code{\link{radixorder}}, \code{\link{qF}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default method GRP(mtcars$cyl) GRP(mtcars, ~ cyl + vs + am) # Or GRP(mtcars, c("cyl","vs","am")) or GRP(mtcars, c(2,8:9)) g <- GRP(mtcars, ~ cyl + vs + am) # Saving the object print(g) # Printing it plot(g) # Plotting it GRPnames(g) # Retain group names fsum(mtcars, g) # Compute the sum of mtcars, grouped by variables cyl, vs and am gsplit(mtcars$mpg, g) # Use the object to split a vector gsplit(NULL, g) # The indices of the groups ## Convert factor to GRP object and vice-versa GRP(iris$Species) as_factor_GRP(g) \donttest{ % No code relying on suggested package ## dplyr integration library(dplyr) mtcars \%>\% group_by(cyl,vs,am) \%>\% GRP() # Get GRP object from a dplyr grouped tibble mtcars \%>\% group_by(cyl,vs,am) \%>\% fmean() # Grouped mean using dplyr grouping mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fmean() # Faster alternative with collapse grouping mtcars \%>\% fgroup_by(cyl,vs,am) # Print method for grouped data frame } library(magrittr) ## Adding a column of group sizes mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% ftransform(Sizes = GRPN(.)) mtcars \%>\% ftransform(Sizes = GRPN(list(cyl,vs,am))) # Same thing, slightly more efficient ## Various options for programming and interactive use fgroup_by(GGDC10S, Variable, Decade = floor(Year / 10) * 10) \%>\% head(3) fgroup_by(GGDC10S, 1:3, 5) \%>\% head(3) fgroup_by(GGDC10S, c("Variable", "Country")) \%>\% head(3) fgroup_by(GGDC10S, is.character) \%>\% head(3) fgroup_by(GGDC10S, Country:Variable, Year) \%>\% head(3) fgroup_by(GGDC10S, Country:Region, Var = Variable, Year) \%>\% head(3) } \keyword{manip} collapse/man/BY.Rd0000644000176200001440000002034514201327077013437 0ustar liggesusers\name{BY} \alias{BY} \alias{BY.default} \alias{BY.matrix} \alias{BY.data.frame} \alias{BY.grouped_df} \title{ Split-Apply-Combine Computing % (Efficient) } \description{ \code{BY} is an S3 generic that efficiently applies functions over vectors or matrix- and data frame columns by groups. Similar to \code{\link{dapply}} it seeks to retain the structure and attributes of the data, but can also output to various standard formats. A simple parallelism is also available. } \usage{ BY(x, \dots) \method{BY}{default}(x, g, FUN, \dots, use.g.names = TRUE, sort = TRUE, expand.wide = FALSE, parallel = FALSE, mc.cores = 1L, return = c("same", "vector", "list")) \method{BY}{matrix}(x, g, FUN, \dots, use.g.names = TRUE, sort = TRUE, expand.wide = FALSE, parallel = FALSE, mc.cores = 1L, return = c("same", "matrix", "data.frame", "list")) \method{BY}{data.frame}(x, g, FUN, \dots, use.g.names = TRUE, sort = TRUE, expand.wide = FALSE, parallel = FALSE, mc.cores = 1L, return = c("same", "matrix", "data.frame", "list")) \method{BY}{grouped_df}(x, FUN, \dots, keep.group_vars = TRUE, use.g.names = FALSE) } \arguments{ \item{x}{a atomic vector, matrix, data frame or alike object.} \item{g}{a \code{\link{GRP}} object, or a factor / atomic vector / list of atomic vectors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{FUN}{a function, can be scalar- or vector-valued. For vector valued functions see \code{expand.wide} and the Note.} \item{\dots}{further arguments to \code{FUN}, or to \code{BY.data.frame} for the 'grouped_df' method.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{sort}{logical. Sort the groups? Internally passed to \code{\link{GRP}}, and only effective if \code{g} is not already a factor or \code{\link{GRP}} object.} \item{expand.wide}{logical. If \code{FUN} is a vector-valued function returning a vector of fixed length > 1 (such as the \code{\link{quantile}} function), \code{expand.wide} can be used to return the result in a wider format (instead of stacking the resulting vectors of fixed length above each other in each output column).} \item{parallel}{logical. \code{TRUE} implements simple parallel execution by internally calling \code{\link{mclapply}} instead of \code{\link{lapply}}.} \item{mc.cores}{integer. Argument to \code{\link{mclapply}} indicating the number of cores to use for parallel execution. Can use \code{\link[=detectCores]{detectCores()}} to select all available cores.} \item{return}{an integer or string indicating the type of object to return. The default \code{1 - "same"} returns the same object type (i.e. class and other attributes are retained if the underlying data type is the same, just the names for the dimensions are adjusted). \code{2 - "matrix"} always returns the output as matrix, \code{3 - "data.frame"} always returns a data frame and \code{4 - "list"} returns the raw (uncombined) output. \emph{Note}: \code{4 - "list"} works together with \code{expand.wide} to return a list of matrices.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation. See also the Note. } % \item{simplify}{logical. Simplify the result to return an object of the same class and with the same attributes. If \code{FALSE}, the raw computation retult in form of a (nested) list is returned.} } \details{ \code{BY} is a frugal re-implementation of the Split-Apply-Combine computing paradigm. It is faster than \code{\link{tapply}}, \code{\link{by}}, \code{\link{aggregate}} and \emph{plyr}, and preserves data attributes just like \code{\link{dapply}}. % and more versatile though not faster than \emph{dplyr} %I note at this point that the philosophy of \emph{collapse} is to move beyond this rather slow computing paradigm, which is why the \link[=fast-statistical-functions]{Fast Statistical Functions} were implemented. However sometimes tasks need to be performed that involve more complex and customized operations on data, and for these cases \code{BY} is a good solution. It is principally a wrapper around \code{lapply(gsplit(x, g), FUN, \dots)}, that uses \code{\link{gsplit}} for optimized splitting and also strongly optimizes on the internal code compared to \emph{base} R functions. For more details look at the documentation for \code{\link{dapply}} which works very similar (apart from the splitting performed in \code{BY}). The function is intended for simple cases involving data aggregation or flexible computation of summary statistics across groups using a single function e.g. \code{iris |> gby(Species) |> BY(IQR)} is simpler than \code{iris |> gby(Species) |> smr(acr(.fns = IQR))} etc.. For larger tasks, the \link[=fast-statistical-functions]{Fast Statistical Functions} or the \emph{data.table} package are more appropriate tools. } \note{ \code{BY} can be used with vector-valued functions preserving the length of the data, note however that, unlike \code{\link{fmutate}}, data is recombined in the order of the groups, not in the order of the original data. It is thus advisable to sort the data by the grouping variable before using \code{BY} with such a function. In particular, in such cases the 'grouped_df' method only keeps grouping columns if data was grouped with \code{fgroup_by(data, ..., sort = TRUE)}, and the grouping algorithm detected that the data is already sorted in the order of the groups (i.e. if \code{attr(with(data, radixorder(...)), "sorted")} is \code{TRUE}), even if \code{keep.group_vars = TRUE}. The same holds for preservation names / rownames in the default, matrix or data frame methods. Basically, \code{BY} is kept as simple as possible without running danger of returning something wrong. } \value{ \code{X} where \code{FUN} was applied to every column split by \code{g}. } \seealso{ \code{\link{dapply}}, \code{\link{collap}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ v <- iris$Sepal.Length # A numeric vector f <- GRP(iris$Species) # A grouping ## default vector method BY(v, f, sum) # Sum by species head(BY(v, f, scale)) # Scale by species (please use fscale instead) head(BY(v, f, scale, use.g.names = FALSE)) # Omitting auto-generated names BY(v, f, quantile) # Species quantiles: by default stacked BY(v, f, quantile, expand.wide = TRUE) # Wide format ## matrix method m <- qM(num_vars(iris)) BY(m, f, sum) # Also return as matrix BY(m, f, sum, return = "data.frame") # Return as data.frame.. also works for computations below head(BY(m, f, scale)) head(BY(m, f, scale, use.g.names = FALSE)) BY(m, f, quantile) BY(m, f, quantile, expand.wide = TRUE) BY(m, f, quantile, expand.wide = TRUE, # Return as list of matrices return = "list") ## data.frame method BY(num_vars(iris), f, sum) # Also returns a data.fram BY(num_vars(iris), f, sum, return = 2) # Return as matrix.. also works for computations below head(BY(num_vars(iris), f, scale)) head(BY(num_vars(iris), f, scale, use.g.names = FALSE)) BY(num_vars(iris), f, quantile) BY(num_vars(iris), f, quantile, expand.wide = TRUE) BY(num_vars(iris), f, quantile, # Return as list of matrices expand.wide = TRUE, return = "list") % No code relying on suggested package ## grouped data frame method library(magrittr) # Note: Used because |> is not available on older R versions giris <- fgroup_by(iris, Species) giris \%>\% BY(sum) # Compute sum giris \%>\% BY(sum, use.g.names = TRUE, # Use row.names and keep.group_vars = FALSE) # remove 'Species' and groups attribute giris \%>\% BY(sum, return = "matrix") # Return matrix giris \%>\% BY(sum, return = "matrix", # Matrix with row.names use.g.names = TRUE) giris \%>\% BY(quantile) # Compute quantiles (output is stacked) giris \%>\% BY(quantile, # Much better, also keeps 'Species' expand.wide = TRUE) } \keyword{manip} collapse/man/summary-statistics.Rd0000644000176200001440000000640614167155773017031 0ustar liggesusers\name{summary-statistics} % \name{Time Series and Panel Computations} \alias{A9-summary-statistics} \alias{summary-statistics} % \alias{tscomp} \title{Summary Statistics} % \emph{collapse} \description{ \emph{collapse} provides the following functions to efficiently summarize and examine data: \itemize{ \item \code{\link{qsu}}, shorthand for quick-summary, is an extremely fast summary command inspired by the (xt)summarize command in the STATA statistical software. It computes a set of 7 statistics (nobs, mean, sd, min, max, skewness and kurtosis) using a numerically stable one-pass method. Statistics can be computed weighted, by groups, and also within-and between entities (for multilevel / panel data). \item \code{\link{descr}} computes a concise and detailed description of a data frame, including frequency tables for categorical variables and various statistics and quantiles for numeric variables. It is inspired by \code{Hmisc::describe}, but about 10x faster. \item \code{\link{pwcor}}, \code{\link{pwcov}} and \code{\link{pwnobs}} compute (weighted) pairwise correlations, covariances and observation counts on matrices and data frames. Pairwise correlations and covariances can be computed together with observation counts and p-values, and output as 3D array (default) or list of matrices. A major feature of \code{pwcor} and \code{pwcov} is the print method displaying all of these statistics in a single correlation table. \item \code{\link{varying}} very efficiently checks for the presence of any variation in data (optionally) within groups (such as panel-identifiers). % \item \code{\link{fFtest}} is a fast implementation of the R-Squared based F-test, to test \bold{exclusion restrictions} in linear models potentially involving multiple large factors (fixed effects). It internally utilizes \code{\link{fhdwithin}} to project out factors while counting the degrees of freedom. } } \section{Table of Functions}{ \tabular{lllll}{\emph{ Function / S3 Generic } \tab\tab \emph{ Methods } \tab\tab \emph{ Description } \cr \code{\link{qsu}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame} \tab\tab Fast (grouped, weighted, panel-decomposed) summary statistics \cr \code{\link{descr}} \tab\tab No methods, for data frames or lists of vectors \tab\tab Detailed statistical description of data frame \cr \code{\link{pwcor}} \tab\tab No methods, for matrices or data frames \tab\tab Pairwise correlations \cr \code{\link{pwcov}} \tab\tab No methods, for matrices or data frames \tab\tab Pairwise covariances \cr \code{\link{pwnobs}} \tab\tab No methods, for matrices or data frames \tab\tab Pairwise observation counts \cr \code{\link{varying}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Fast variation check \cr % \code{\link{fFtest}} \tab\tab No methods, it's a standalone test to which data needs to be supplied. \tab\tab Fast F-test of exclusion restrictions in linear models (with factors variables) \cr } } \seealso{ \link[=collapse-documentation]{Collapse Overview}, \link[=fast-statistical-functions]{Fast Statistical Functions} } \keyword{manip} \keyword{documentation} collapse/man/fsum.Rd0000644000176200001440000001715114175334642014106 0ustar liggesusers\name{fsum} \alias{fsum} \alias{fsum.default} \alias{fsum.matrix} \alias{fsum.data.frame} \alias{fsum.grouped_df} \title{Fast (Grouped, Weighted) Sum for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{fsum} is a generic function that computes the (column-wise) sum of all values in \code{x}, (optionally) grouped by \code{g} and/or weighted by \code{w} (e.g., to calculate survey totals). The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped, weighted) sum. } \usage{ fsum(x, \dots) \method{fsum}{default}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, \dots) \method{fsum}{matrix}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{fsum}{data.frame}(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, \dots) \method{fsum}{grouped_df}(x, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{GRP} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{GRP} object) used to group \code{x}.} \item{w}{a numeric vector of (non-negative) weights, may contain missing values.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 1 - "replace_fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost. If \code{na.rm = FALSE} a \code{NA} is returned when encountered.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{keep.w}{\emph{grouped_df method:} Logical. Retain summed weighting variable after computation (if contained in \code{grouped_df}).} \item{\dots}{arguments to be passed to or from other methods.} } \details{ % Non-grouped sum computations internally utilize long-doubles in C++, for additional numeric precision. Missing-value removal as controlled by the \code{na.rm} argument is done very efficiently by simply skipping them in the computation (thus setting \code{na.rm = FALSE} on data with no missing values doesn't give extra speed). Large performance gains can nevertheless be achieved in the presence of missing values if \code{na.rm = FALSE}, since then the corresponding computation is terminated once a \code{NA} is encountered and \code{NA} is returned (unlike \code{\link{sum}} which just runs through without any checks). The weighted sum (e.g., survey total) is computed as \code{sum(x * w)}, but in one pass and about twice as efficient. If \code{na.rm = TRUE}, missing values will be removed from both \code{x} and \code{w} i.e. utilizing only \code{x[complete.cases(x,w)]} and \code{w[complete.cases(x,w)]}. This all seamlessly generalizes to grouped computations, which are performed in a single pass (without splitting the data) and are therefore extremely fast. See Benchmark and Examples below. When applied to data frames with groups or \code{drop = FALSE}, \code{fsum} preserves all column attributes (such as variable labels), unless columns have a class (checked using \code{is.object}). The attributes of the data frame itself are also preserved. Since v1.6.0 \code{fsum} explicitly supports integers. Integers are summed using the long long type in C which is bounded at +-9,223,372,036,854,775,807 (so ~4.3 billion times greater than the minimum/maximum R integer bounded at +-2,147,483,647). If the value of the sum is outside +-2,147,483,647, a double containing the result is returned, otherwise an integer is returned. With groups, an integer overflow error is provided if the sum in any group is outside +-2,147,483,647. } \value{ The (\code{w} weighted) sum of \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its sum, grouped by \code{g}. } \section{See Also}{ \code{\link{fprod}}, \code{\link{fmean}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \section{Examples}{\preformatted{ ## default vector method mpg <- mtcars$mpg fsum(mpg) # Simple sum fsum(mpg, w = mtcars$hp) # Weighted sum (total): Weighted by hp fsum(mpg, TRA = "\%") # Simple transformation: obtain percentages of mpg fsum(mpg, mtcars$cyl) # Grouped sum fsum(mpg, mtcars$cyl, mtcars$hp) # Weighted grouped sum (total) fsum(mpg, mtcars[c(2,8:9)]) # More groups.. g <- GRP(mtcars, ~ cyl + vs + am) # Precomputing groups gives more speed ! fsum(mpg, g) fmean(mpg, g) == fsum(mpg, g) / fnobs(mpg, g) fsum(mpg, g, TRA = "\%") # Percentages by group ## data.frame method fsum(mtcars) fsum(mtcars, TRA = "\%") fsum(mtcars, g) fsum(mtcars, g, TRA = "\%") ## matrix method m <- qM(mtcars) fsum(m) fsum(m, TRA = "\%") fsum(m, g) fsum(m, g, TRA = "\%") \donttest{ % No code relying on suggested package ## method for grouped data frames - created with dplyr::group_by or fgroup_by library(dplyr) mtcars \%>\% group_by(cyl,vs,am) \%>\% fsum(hp) # Weighted grouped sum (total) mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fsum(hp) # Equivalent and faster !! mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fsum(TRA = "\%") mtcars \%>\% fgroup_by(cyl,vs,am) \%>\% fselect(mpg) \%>\% fsum() } } } \section{Benchmark}{\preformatted{ ## This compares fsum with data.table (2 threads) and base::rowsum # Starting with small data mtcDT <- qDT(mtcars) f <- qF(mtcars$cyl) library(microbenchmark) microbenchmark(mtcDT[, lapply(.SD, sum), by = f], rowsum(mtcDT, f, reorder = FALSE), fsum(mtcDT, f, na.rm = FALSE), unit = "relative") expr min lq mean median uq max neval cld mtcDT[, lapply(.SD, sum), by = f] 145.436928 123.542134 88.681111 98.336378 71.880479 85.217726 100 c rowsum(mtcDT, f, reorder = FALSE) 2.833333 2.798203 2.489064 2.937889 2.425724 2.181173 100 b fsum(mtcDT, f, na.rm = FALSE) 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 100 a # Now larger data tdata <- qDT(replicate(100, rnorm(1e5), simplify = FALSE)) # 100 columns with 100.000 obs f <- qF(sample.int(1e4, 1e5, TRUE)) # A factor with 10.000 groups microbenchmark(tdata[, lapply(.SD, sum), by = f], rowsum(tdata, f, reorder = FALSE), fsum(tdata, f, na.rm = FALSE), unit = "relative") expr min lq mean median uq max neval cld tdata[, lapply(.SD, sum), by = f] 2.646992 2.975489 2.834771 3.081313 3.120070 1.2766475 100 c rowsum(tdata, f, reorder = FALSE) 1.747567 1.753313 1.629036 1.758043 1.839348 0.2720937 100 b fsum(tdata, f, na.rm = FALSE) 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000 100 a }} \keyword{univar} \keyword{manip} collapse/man/fnobs.Rd0000644000176200001440000000700714176642305014241 0ustar liggesusers\name{fnobs} \alias{fnobs} \alias{fnobs.default} \alias{fnobs.matrix} \alias{fnobs.data.frame} \alias{fnobs.grouped_df} \title{Fast (Grouped) Observation Count for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{fnobs} is a generic function that (column-wise) computes the number of non-missing values in \code{x}, (optionally) grouped by \code{g}. It is much faster than \code{sum(!is.na(x))}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped) observation count. } \usage{ fnobs(x, \dots) \method{fnobs}{default}(x, g = NULL, TRA = NULL, use.g.names = TRUE, \dots) \method{fnobs}{matrix}(x, g = NULL, TRA = NULL, use.g.names = TRUE, drop = TRUE, \dots) \method{fnobs}{data.frame}(x, g = NULL, TRA = NULL, use.g.names = TRUE, drop = TRUE, \dots) \method{fnobs}{grouped_df}(x, TRA = NULL, use.g.names = FALSE, keep.group_vars = TRUE, \dots) } \arguments{ \item{x}{a vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 1 - "replace_fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{\dots}{arguments to be passed to or from other methods.} } \details{ \code{fnobs} preserves all attributes of non-classed vectors / columns, and only the 'label' attribute (if available) of classed vectors / columns (i.e. dates or factors). When applied to data frames and matrices, the row-names are adjusted as necessary. } \value{ Integer. The number of non-missing observations in \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its number of non-missing observations, grouped by \code{g}. } \seealso{ \code{\link{fndistinct}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method fnobs(airquality$Solar.R) # Simple Nobs fnobs(airquality$Solar.R, airquality$Month) # Grouped Nobs ## data.frame method fnobs(airquality) fnobs(airquality, airquality$Month) fnobs(wlddev) # Works with data of all types! head(fnobs(wlddev, wlddev$iso3c)) ## matrix method aqm <- qM(airquality) fnobs(aqm) # Also works for character or logical matrices fnobs(aqm, airquality$Month) \donttest{ % No code relying on suggested package ## method for grouped data frames - created with dplyr::group_by or fgroup_by library(dplyr) airquality \%>\% group_by(Month) \%>\% fnobs() wlddev \%>\% group_by(country) \%>\% select(PCGDP,LIFEEX,GINI,ODA) \%>\% fnobs() } } \keyword{univar} \keyword{manip} collapse/man/frename.Rd0000644000176200001440000000611714170125353014541 0ustar liggesusers\name{frename} \alias{rnm} \alias{frename} \alias{setrename} \alias{relabel} \alias{setrelabel} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Renaming and Relabelling Objects } \description{ A fast substitute for \code{dplyr::rename}. \code{setrename} renames objects by reference. These functions also work with objects other than data frames that have a 'names' attribute. \code{relabel} and \code{setrelabel} do that same for labels attached to list elements / data frame columns. } \usage{ frename(.x, \dots, cols = NULL) rnm(.x, \dots, cols = NULL) # Shortcut for frename() setrename(.x, \dots, cols = NULL) relabel(.x, \dots, cols = NULL, attrn = "label") setrelabel(.x, \dots, cols = NULL, attrn = "label") } %- maybe also 'usage' for other objects documented here. \arguments{ \item{.x}{an R object with a 'names' attribute. } \item{\dots}{either tagged vector expressions of the form \code{name = newname} / \code{name = newlabel}, or a single function (+ optional arguments to the function) applied to all names/labels (of columns/elements selected in \code{cols}). } \item{cols}{If \code{\dots} is a function, select a subset of columns/elements to rename/relabel using names, indices, a logical vector or a function applied to the columns if \code{.x} is a data frame (e.g. \code{is.numeric}).} \item{attrn}{character. Name of attribute to store labels or retrieve labels from.} } \value{ \code{.x} renamed / relabelled. \code{setrename} and \code{setrelabel} return \code{.x} invisibly. } \note{ Note that both \code{relabel} and \code{setrelabel} modify \code{.x} by reference. This is because labels are attached to columns themselves, making it impossible to avoid permanent modification by taking a shallow copy of the encompassing list / data.frame. On the other hand \code{frename} makes a shallow copy whereas \code{setrename} also modifies by reference. } \seealso{ \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Using tagged expressions head(frename(iris, Sepal.Length = SL, Sepal.Width = SW, Petal.Length = PL, Petal.Width = PW)) head(frename(iris, Sepal.Length = "S L", Sepal.Width = "S W", Petal.Length = "P L", Petal.Width = "P W")) ## Using a function head(frename(iris, tolower)) head(frename(iris, tolower, cols = 1:2)) head(frename(iris, tolower, cols = is.numeric)) head(frename(iris, paste, "new", sep = "_", cols = 1:2)) ## Renaming by reference # setrename(iris, tolower) # head(iris) # rm(iris) ## Relabelling (by reference) # namlab(relabel(wlddev, PCGDP = "GDP per Capita", LIFEEX = "Life Expectancy")) # namlab(relabel(wlddev, toupper)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{ manip } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/flag.Rd0000644000176200001440000003510014175334642014037 0ustar liggesusers\name{flag} \alias{flag} \alias{flag.default} \alias{flag.matrix} \alias{flag.data.frame} \alias{flag.pseries} \alias{flag.pdata.frame} \alias{flag.grouped_df} \alias{L} \alias{L.default} \alias{L.matrix} \alias{L.data.frame} \alias{L.pseries} \alias{L.pdata.frame} \alias{L.grouped_df} \alias{F} \alias{F.default} \alias{F.matrix} \alias{F.data.frame} \alias{F.pseries} \alias{F.pdata.frame} \alias{F.grouped_df} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Lags and Leads for Time Series and Panel Data } \description{ \code{flag} is an S3 generic to compute (sequences of) lags and leads. \code{L} and \code{F} are wrappers around \code{flag} representing the lag- and lead-operators, such that \code{L(x,-1) = F(x,1) = F(x)} and \code{L(x,-3:3) = F(x,3:-3)}. \code{L} and \code{F} provide more flexibility than \code{flag} when applied to data frames (i.e. column subsetting, formula input and id-variable-preservation capabilities\dots), but are otherwise identical. (\code{flag} is more of a programmers function in style of the \link[=fast-statistical-functions]{Fast Statistical Functions} while \code{L} and \code{F} are more practical to use in regression formulas or for computations on data frames.) } \usage{ flag(x, n = 1, \dots) L(x, n = 1, \dots) F(x, n = 1, \dots) \method{flag}{default}(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = TRUE, \dots) \method{L}{default}(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = TRUE, \dots) \method{F}{default}(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = TRUE, \dots) \method{flag}{matrix}(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = length(n) > 1L, \dots) \method{L}{matrix}(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = TRUE, \dots) \method{F}{matrix}(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = TRUE, \dots) \method{flag}{data.frame}(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = length(n) > 1L, \dots) \method{L}{data.frame}(x, n = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, stubs = TRUE, keep.ids = TRUE, \dots) \method{F}{data.frame}(x, n = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, stubs = TRUE, keep.ids = TRUE, \dots) # Methods for compatibility with plm: \method{flag}{pseries}(x, n = 1, fill = NA, stubs = TRUE, \dots) \method{L}{pseries}(x, n = 1, fill = NA, stubs = TRUE, \dots) \method{F}{pseries}(x, n = 1, fill = NA, stubs = TRUE, \dots) \method{flag}{pdata.frame}(x, n = 1, fill = NA, stubs = length(n) > 1L, \dots) \method{L}{pdata.frame}(x, n = 1, cols = is.numeric, fill = NA, stubs = TRUE, keep.ids = TRUE, \dots) \method{F}{pdata.frame}(x, n = 1, cols = is.numeric, fill = NA, stubs = TRUE, keep.ids = TRUE, \dots) # Methods for grouped data frame / compatibility with dplyr: \method{flag}{grouped_df}(x, n = 1, t = NULL, fill = NA, stubs = length(n) > 1L, keep.ids = TRUE, \dots) \method{L}{grouped_df}(x, n = 1, t = NULL, fill = NA, stubs = TRUE, keep.ids = TRUE, \dots) \method{F}{grouped_df}(x, n = 1, t = NULL, fill = NA, stubs = TRUE, keep.ids = TRUE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a vector / time series, (time series) matrix, data frame, panel series (\code{plm::pseries}), panel data frame (\code{plm::pdata.frame}) or grouped data frame (class 'grouped_df'). Data must not be numeric i.e you can also lag a date variable, character data etc\dots} \item{n}{integer. A vector indicating the lags / leads to compute (passing negative integers to \code{flag} or \code{L} computes leads, passing negative integers to \code{F} computes lags).} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{by}{\emph{data.frame method}: Same as \code{g}, but also allows one- or two-sided formulas i.e. \code{~ group1} or \code{var1 + var2 ~ group1 + group2}. See Examples.} \item{t}{same input as \code{g/by}, to indicate the time-variable(s). For safe computation of differences on unordered time series and panels. Data Frame method also allows one-sided formula i.e. \code{~time}. grouped_df method supports lazy-evaluation i.e. \code{time} (no quotes).} \item{cols}{\emph{data.frame method}: Select columns to difference using a function, column names, indices or a logical vector. Default: All numeric variables. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{fill}{value to insert when vectors are shifted. Default is \code{NA}. } \item{stubs}{logical. \code{TRUE} will rename all lagged / leaded columns by adding a stub or prefix "L\code{n}." / "F\code{n}.".} \item{keep.ids}{\emph{data.frame / pdata.frame / grouped_df methods}: Logical. Drop all panel-identifiers from the output (which includes all variables passed to \code{by} or \code{t}). \emph{Note}: For grouped / panel data frames identifiers are dropped, but the 'groups' / 'index' attributes are kept.} \item{\dots}{arguments to be passed to or from other methods.} } \details{ If a single integer is passed to \code{n}, and \code{g/by} and \code{t} are left empty, \code{flag/L/F} just returns \code{x} with all columns lagged / leaded by \code{n}. If \code{length(n)>1}, and \code{x} is an atomic vector (time series), \code{flag/L/F} returns a (time series) matrix with lags / leads computed in the same order as passed to \code{n}. If instead \code{x} is a matrix / data frame, a matrix / data frame with \code{ncol(x)*length(n)} columns is returned where columns are sorted first by variable and then by lag (so all lags computed on a variable are grouped together). \code{x} can be of any standard data type. With groups/panel-identifiers supplied to \code{g/by}, \code{flag/L/F} efficiently computes a panel-lag/lead by shifting the entire vector(s) but inserting \code{fill} elements in the right places. If \code{t} is left empty, the data needs to be ordered such that all values belonging to a group are consecutive and in the right order. It is not necessary that the groups themselves occur in the right order. If a time-variable is supplied to \code{t} (or a list of time-variables uniquely identifying the time-dimension), the panel is fully identified and lags / leads can be securely computed even if the data is unordered. It is also possible to lag unordered or irregular time series utilizing only the \code{t} argument to identify the temporal dimension of the data. Since v1.5.0 \code{flag/L/F} provide full built-in support for irregular time series and unbalanced panels. The suggested workaround using the \code{\link{seqid}} function is therefore no longer necessary. %\code{flag/L/F} supports balanced panels and unbalanced panels where various individuals are observed for different time-sequences (both start, end and duration of observation can differ for each individual). \code{flag/L/F} does not natively support irregularly spaced time series and panels, that is situations where there are either gaps in time and/or repeated observations in the same time-period for some individual (see also computational details below). For such cases the function \code{\link{seqid}} can be used to generate an appropriate panel-identifier (i.e. splitting individuals with an irregular time-sequence into multiple individuals with regular time-sequences before applying \code{flag/L/F}). %(in that case data is shifted around and \code{fill} values are inserted in such a way that if the data were sorted afterwards the result would be identical to computing lags / leads on sorted data). Internally this works by using the grouping- and time-variable(s) to create an ordering and then accessing the panel-vector(s) through this ordering. If the data is just a bit unordered, such computations are nearly as fast as computations on ordered data (without \code{t}), however, if the data is very unordered, it can take significantly longer. Since most panel data come perfectly or pretty ordered, I recommend always supplying \code{t} to be on the safe-side. % It is also possible to compute lags / leads on unordered time series (thus utilizing \code{t} but leaving \code{g/by} empty), although this is probably more rare to encounter than unordered panels. Irregularly spaced time series can also be lagged using a panel- identifier generated with \code{\link{seqid}}. Computationally, if both \code{g/by} and \code{t} are supplied, \code{flag/L/F} uses two initial passes to create an ordering through which the data are accessed. First-pass: Calculate minimum and maximum time-value for each individual. Second-pass: Generate the ordering by placing the current element index into the vector slot obtained by adding the cumulative group size and the current time-value subtracted its individual-minimum together. This method of computation is faster than any sort-based method and delivers optimal performance if the panel-id supplied to \code{g/by} is already a factor variable, and if \code{t} is either an integer or factor variable. If \code{t} is not factor or integer but instead \code{is.double(t) && !is.object(t)}, it is assumed to be integer represented by double and converted using \code{as.integer(t)}. For other objects such as dates, \code{t} is grouped using \code{\link{qG}} or \code{\link{GRP}} (for multiple time identifiers). Similarly, if \code{g/by} is not factor or 'GRP' object, \code{\link{qG}} or \code{\link{GRP}} will be called to group the respective identifier. Since grouping is more expensive than computing lags, prepare the data for optimal performance (or use \emph{plm} classes). See also the Note. %A caveat of not using sort-based methods is that gaps or repeated values in time are only recognized towards the end of the second pass where they cannot be rectified anymore, and thus \code{flag/L/F} does not natively support irregular panels but throws an error. The methods applying to \emph{plm} objects (panel series and panel data frames) automatically utilize the factor panel-identifiers attached to these objects and thus securely and efficiently compute fully identified panel-lags. If these objects have > 2 panel-identifiers attached to them, the last identifier is assumed to be the time-variable, and the others are taken as grouping-variables and interacted. Note that \code{flag/L/F} is significantly faster than \code{plm::lag/plm::lead} since the latter is written in R and based on a Split-Apply-Combine logic. } \value{ \code{x} lagged / leaded \code{n}-times, grouped by \code{g/by}, ordered by \code{t}. See Details and Examples. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } \note{ Since v1.7.0, if \code{is.double(t) && !is.object(t)}, it is coerced to integer using \code{as.integer(t)}. This is to avoid the inefficiency of ordered grouping, and owes to the fact that in most data imported into R, the time (year) variables are coded as double although they should be integer. } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{fdiff}}, \code{\link{fgrowth}}, \link[=time-series-panel-series]{Time Series and Panel Series}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Simple Time Series: AirPassengers L(AirPassengers) # 1 lag F(AirPassengers) # 1 lead all_identical(L(AirPassengers), # 3 identical ways of computing 1 lag flag(AirPassengers), F(AirPassengers, -1)) head(L(AirPassengers, -1:3)) # 1 lead and 3 lags - output as matrix ## Time Series Matrix of 4 EU Stock Market Indicators, 1991-1998 tsp(EuStockMarkets) # Data is recorded on 260 days per year freq <- frequency(EuStockMarkets) plot(stl(EuStockMarkets[,"DAX"], freq)) # There is some obvious seasonality head(L(EuStockMarkets, -1:3 * freq)) # 1 annual lead and 3 annual lags summary(lm(DAX ~., data = L(EuStockMarkets,-1:3*freq))) # DAX regressed on it's own annual lead, # lags and the lead/lags of the other series ## World Development Panel Data head(flag(wlddev, 1, wlddev$iso3c, wlddev$year)) # This lags all variables, head(L(wlddev, 1, ~iso3c, ~year)) # This lags all numeric variables head(L(wlddev, 1, ~iso3c)) # Without t: Works because data is ordered head(L(wlddev, 1, PCGDP + LIFEEX ~ iso3c, ~year)) # This lags GDP per Capita & Life Expectancy head(L(wlddev, 0:2, ~ iso3c, ~year, cols = 9:10)) # Same, also retaining original series head(L(wlddev, 1:2, PCGDP + LIFEEX ~ iso3c, ~year, # Two lags, dropping id columns keep.ids = FALSE)) # Different ways of regressing GDP on its's lags and life-Expectancy and it's lags summary(lm(PCGDP ~ ., L(wlddev, 0:2, ~iso3c, ~year, 9:10, keep.ids = FALSE))) # 1 - Precomputing summary(lm(PCGDP ~ L(PCGDP,1:2,iso3c,year) + L(LIFEEX,0:2,iso3c,year), wlddev)) # 2 - Ad-hoc summary(lm(PCGDP ~ L(PCGDP,1:2,iso3c) + L(LIFEEX,0:2,iso3c), wlddev)) # 3 - same no year g = qF(wlddev$iso3c); t = qF(wlddev$year) # 4- Precomputing summary(lm(PCGDP ~ L(PCGDP,1:2,g,t) + L(LIFEEX,0:2,g,t), wlddev)) # panel-id's \donttest{ % No code relying on suggested package ## Using plm: pwlddev <- plm::pdata.frame(wlddev, index = c("iso3c","year")) head(L(pwlddev, 0:2, 9:10)) # Again 2 lags of GDP and LIFEEX PCGDP <- pwlddev$PCGDP # A panel-Series of GDP per Capita head(L(PCGDP)) # Lagging the panel series summary(lm(PCGDP ~ ., L(pwlddev, 0:2, 9:10, keep.ids = FALSE))) # Running the lm again # THIS DOES NOT WORK: -> a pseries is only created when subsetting the pdata.frame using $ or [[ summary(lm(PCGDP ~ L(PCGDP,1:2) + L(LIFEEX,0:2), pwlddev)) # ..so L.default is used here.. LIFEEX <- pwlddev$LIFEEX # To make it work, create pseries summary(lm(PCGDP ~ L(PCGDP,1:2) + L(LIFEEX,0:2))) # THIS WORKS ! ## Using dplyr: library(dplyr) wlddev \%>\% group_by(iso3c) \%>\% select(PCGDP,LIFEEX) \%>\% L(0:2) wlddev \%>\% group_by(iso3c) \%>\% select(year,PCGDP,LIFEEX) \%>\% L(0:2,year) # Also using t (safer) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{ts} % __ONLY ONE__ keyword per line collapse/DESCRIPTION0000644000176200001440000000760314201453312013563 0ustar liggesusersPackage: collapse Title: Advanced and Fast Data Transformation Version: 1.7.6 Date: 2022-02-11 Authors@R: c( person("Sebastian", "Krantz", role = c("aut", "cre"), email = "sebastian.krantz@graduateinstitute.ch"), person("Matt", "Dowle", role = "ctb"), person("Arun", "Srinivasan", role = "ctb"), person("Morgan", "Jacob", role = "ctb"), person("Dirk", "Eddelbuettel", role = "ctb"), person("Laurent", "Berge", role = "ctb"), person("Kevin", "Tappe", role = "ctb"), person("R Core Team and contributors worldwide", role = "ctb"), person("Martyn", "Plummer", role = "cph"), person("1999-2016 The R Core Team", role = "cph") ) Description: A C/C++ based package for advanced data transformation and statistical computing in R that is extremely fast, flexible and parsimonious to code with, class-agnostic and programmer friendly. It is well integrated with base R, 'dplyr' / (grouped) 'tibble', 'data.table', 'plm' (panel-series and data frames), 'sf' data frames, and non-destructively handles other matrix or data frame based classes (such as 'ts', 'xts' / 'zoo', 'timeSeries', 'tsibble', 'tibbletime' etc.) --- Key Features: --- (1) Advanced statistical programming: A full set of fast statistical functions supporting grouped and weighted computations on vectors, matrices and data frames. Fast and programmable grouping, ordering, unique values / rows, factor generation and interactions. Fast and flexible functions for data manipulation, data object conversions, and memory efficient R programming. (2) Advanced aggregation: Fast and easy multi-data-type, multi-function, weighted, parallelized and fully custom data aggregation. (3) Advanced transformations: Fast row / column arithmetic, (grouped) replacing and sweeping out of statistics, (grouped, weighted) scaling / standardizing, between (averaging) and (quasi-)within (demeaning) transformations, higher-dimensional centering (i.e. multiple fixed effects or polynomials), linear prediction, model fitting and testing exclusion restrictions. (4) Advanced time-computations: Fast (sequences of) lags / leads, and (lagged / leaded, iterated, quasi-, log-) differences and (compounded) growth rates on (irregular) time series and panel data. Multivariate auto-, partial- and cross-correlation functions for panel data. Panel data to (ts-)array conversions. (5) List processing: (Recursive) list search, splitting, extraction / subsetting, data-apply, and generalized recursive row-binding / unlisting in 2D. (6) Advanced data exploration: Fast (grouped, weighted, panel-decomposed) summary statistics for complex multilevel / panel data. URL: https://sebkrantz.github.io/collapse/, https://github.com/SebKrantz/collapse, https://twitter.com/collapse_R BugReports: https://github.com/SebKrantz/collapse/issues License: GPL (>= 2) | file LICENSE Encoding: UTF-8 LazyData: true Depends: R (>= 2.10) Imports: Rcpp (>= 1.0.1) LinkingTo: Rcpp Suggests: fastverse, data.table, magrittr, kit, sf, plm, fixest, vars, RcppArmadillo, RcppEigen, dplyr, ggplot2, scales, microbenchmark, testthat, covr, knitr, rmarkdown SystemRequirements: C++11 VignetteBuilder: knitr NeedsCompilation: yes Packaged: 2022-02-11 00:37:13 UTC; Sebastian Krantz Author: Sebastian Krantz [aut, cre], Matt Dowle [ctb], Arun Srinivasan [ctb], Morgan Jacob [ctb], Dirk Eddelbuettel [ctb], Laurent Berge [ctb], Kevin Tappe [ctb], R Core Team and contributors worldwide [ctb], Martyn Plummer [cph], 1999-2016 The R Core Team [cph] Maintainer: Sebastian Krantz Repository: CRAN Date/Publication: 2022-02-11 12:30:02 UTC collapse/build/0000755000176200001440000000000014201327662013156 5ustar liggesuserscollapse/build/vignette.rds0000644000176200001440000000033414201327662015515 0ustar liggesusersuO0-PQHLLtap+!n5旋Mzw}G[( FLrNJ.Tfu&:"J j 2ȐLɺʘ28?oG AeJ&h7~[AYؽ`SsRSlDW&<;H-hƉ2h]+w_Xoacollapse/tests/0000755000176200001440000000000013747523041013224 5ustar liggesuserscollapse/tests/testthat/0000755000176200001440000000000014201453312015051 5ustar liggesuserscollapse/tests/testthat/test-attribute-handling.R0000644000176200001440000005036414167345221021760 0ustar liggesuserscontext("Attribute Handling") v <- wlddev$PCGDP date <- wlddev$date fac <- wlddev$region g1 <- GRP(wlddev$country) m <- qM(mtcars) gmtc <- fgroup_by(mtcars, cyl, vs, am) gm <- qM(gmtc, TRUE) g2 <- GRP(mtcars, ~ cyl + vs + am) # gDTmtc <- fgroup_by(qDT(mtcars), cyl, vs, am) set.seed(101) f1 <- sample.int(5, length(AirPassengers), replace = TRUE) f2 <- sample.int(5, nrow(EuStockMarkets), replace = TRUE) numFUN <- setdiff(.FAST_STAT_FUN, c("fnth", "fmode", "ffirst", "flast")) countFUN <- c("fnobs", "fndistinct") test_that("statistical functions handle attributes properly", { for(i in numFUN) { # print(i) FUN <- match.fun(i) expect_true(is.null(attributes(FUN(v)))) expect_true(is.null(attributes(FUN(date)))) expect_true(is.null(attributes(FUN(fac)))) expect_true(is.null(attributes(FUN(AirPassengers)))) expect_identical(attributes(FUN(EuStockMarkets)), list(names = colnames(EuStockMarkets))) expect_identical(attributes(FUN(EuStockMarkets, drop = FALSE)), list(dim = c(1L, 4L), dimnames = list(NULL, colnames(EuStockMarkets)))) expect_identical(attributes(FUN(m)), list(names = colnames(m))) expect_identical(attributes(FUN(m, drop = FALSE)), list(dim = c(1L, 11L), dimnames = list(NULL, colnames(m)))) expect_identical(attributes(FUN(gm)), list(names = colnames(m))) expect_identical(attributes(FUN(gm, drop = FALSE)), list(dim = c(1L, 11L), dimnames = list(NULL, colnames(m)), groups = attr(gm, "groups"))) expect_identical(attributes(FUN(mtcars)), list(names = names(mtcars))) expect_identical(attributes(FUN(mtcars, drop = FALSE)), `[[<-`(attributes(mtcars), "row.names", 1L)) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"))), list(names = names(mtcars))) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), drop = FALSE)), `[[<-`(`[[<-`(attributes(gmtc), "row.names", 1L), "class", "data.frame")) # Grouped expect_identical(attributes(FUN(v, g1, use.g.names = FALSE)), attributes(v)) expect_identical(attributes(FUN(v, g1)), c(attributes(v), list(names = unattrib(GRPnames(g1))))) expect_identical(attributes(FUN(date, g1, use.g.names = FALSE)), if(i %!in% countFUN) NULL else list(label = vlabels(date))) expect_identical(attributes(FUN(date, g1)), if(i %!in% countFUN) list(names = unattrib(GRPnames(g1))) else list(label = vlabels(date), names = unattrib(GRPnames(g1)))) expect_identical(attributes(FUN(fac, g1, use.g.names = FALSE)), if(i %!in% countFUN) NULL else list(label = vlabels(fac))) expect_identical(attributes(FUN(fac, g1)), if(i %!in% countFUN) list(names = unattrib(GRPnames(g1))) else list(label = vlabels(fac), names = unattrib(GRPnames(g1)))) expect_identical(attributes(FUN(AirPassengers, f1, use.g.names = FALSE)), NULL) expect_identical(attributes(FUN(AirPassengers, f1)), list(names = as.character(1:5))) expect_identical(attributes(FUN(EuStockMarkets, f2, use.g.names = FALSE)), list(dim = c(5L, 4L), dimnames = dimnames(EuStockMarkets))) expect_identical(attributes(FUN(EuStockMarkets, f2)), list(dim = c(5L, 4L), dimnames = list(as.character(1:5), colnames(EuStockMarkets)))) expect_identical(attributes(FUN(m, g2, use.g.names = FALSE)), list(dim = c(7L, 11L), dimnames = list(NULL, colnames(m)))) expect_identical(attributes(FUN(m, g2)), list(dim = c(7L, 11L), dimnames = list(GRPnames(g2), colnames(m)))) expect_identical(attributes(FUN(gm, attr(gm, "groups"), use.g.names = FALSE)), list(dim = c(7L, 11L), dimnames = list(NULL, colnames(m)), groups = attr(gm, "groups"))) expect_identical(attributes(FUN(gm, attr(gm, "groups"))), list(dim = c(7L, 11L), dimnames = list(GRPnames(g2), colnames(m)), groups = attr(gm, "groups"))) if(Sys.getenv("NCRAN") == "TRUE") { expect_identical(attributes(FUN(mtcars, g2, use.g.names = FALSE)), `[[<-`(attributes(mtcars), "row.names", value = seq_len(g2[[1L]]))) expect_identical(attributes(FUN(mtcars, g2)), `[[<-`(attributes(mtcars), "row.names", value = GRPnames(g2))) } expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), g2, use.g.names = FALSE)), `[[<-`(`[[<-`(attributes(gmtc), "row.names", value = seq_len(g2[[1L]])), "class", "data.frame")) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), g2)), `[[<-`(`[[<-`(attributes(gmtc), "row.names", GRPnames(g2)), "class", "data.frame")) expect_identical(attributes(FUN(gmtc)), list(names = c(fgroup_vars(gmtc, "names"), names(mtcars)[-c(2,8:9)]), row.names = seq_len(g2[[1L]]), class = "data.frame")) expect_identical(attributes(FUN(gmtc, use.g.names = TRUE)), list(names = c(fgroup_vars(gmtc, "names"), names(mtcars)[-c(2,8:9)]), row.names = GRPnames(g2), class = "data.frame")) expect_identical(attributes(FUN(gmtc, keep.group_vars = FALSE)), list(names = names(mtcars)[-c(2,8:9)], row.names = seq_len(g2[[1L]]), class = "data.frame")) expect_identical(attributes(FUN(gmtc, keep.group_vars = FALSE, use.g.names = TRUE)), list(names = names(mtcars)[-c(2,8:9)], row.names = GRPnames(g2), class = "data.frame")) } for(i in c("fmode", "ffirst", "flast")) { # print(i) FUN <- match.fun(i) for(k in names(wlddev)) expect_identical(attributes(FUN(wlddev[[k]])), attributes(wlddev[[k]])) expect_identical(attributes(FUN(AirPassengers)), attributes(AirPassengers)) # This is problematic!! expect_identical(attributes(FUN(EuStockMarkets)), list(names = colnames(EuStockMarkets))) expect_identical(attributes(FUN(EuStockMarkets, drop = FALSE)), list(dim = c(1L, 4L), dimnames = list(NULL, colnames(EuStockMarkets)))) expect_identical(attributes(FUN(m)), list(names = colnames(m))) expect_identical(attributes(FUN(m, drop = FALSE)), list(dim = c(1L, 11L), dimnames = list(NULL, colnames(m)))) expect_identical(attributes(FUN(gm)), list(names = colnames(m))) expect_identical(attributes(FUN(gm, drop = FALSE)), list(dim = c(1L, 11L), dimnames = list(NULL, colnames(m)), groups = attr(gm, "groups"))) expect_identical(attributes(FUN(wlddev)), list(names = names(wlddev))) expect_identical(attributes(FUN(wlddev, drop = FALSE)), `[[<-`(attributes(wlddev), "row.names", 1L)) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"))), list(names = names(mtcars))) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), drop = FALSE)), `[[<-`(`[[<-`(attributes(gmtc), "row.names", 1L), "class", "data.frame")) # Grouped for(k in names(wlddev)) expect_identical(attributes(FUN(wlddev[[k]], g1, use.g.names = FALSE)), attributes(wlddev[[k]])) for(k in names(wlddev)) expect_identical(attributes(FUN(wlddev[[k]], g1)), c(attributes(wlddev[[k]]), list(names = unattrib(GRPnames(g1))))) expect_identical(attributes(FUN(AirPassengers, f1, use.g.names = FALSE)), attributes(AirPassengers)) # This is problematic!! expect_identical(attributes(FUN(AirPassengers, f1)), c(attributes(AirPassengers), list(names = as.character(1:5)))) # This is problematic!! expect_identical(attributes(FUN(EuStockMarkets, f2, use.g.names = FALSE)), list(dim = c(5L, 4L), dimnames = dimnames(EuStockMarkets))) expect_identical(attributes(FUN(EuStockMarkets, f2)), list(dim = c(5L, 4L), dimnames = list(as.character(1:5), colnames(EuStockMarkets)))) expect_identical(attributes(FUN(m, g2, use.g.names = FALSE)), list(dim = c(7L, 11L), dimnames = list(NULL, colnames(m)))) expect_identical(attributes(FUN(m, g2)), list(dim = c(7L, 11L), dimnames = list(GRPnames(g2), colnames(m)))) expect_identical(attributes(FUN(gm, attr(gm, "groups"), use.g.names = FALSE)), list(dim = c(7L, 11L), dimnames = list(NULL, colnames(m)), groups = attr(gm, "groups"))) expect_identical(attributes(FUN(gm, attr(gm, "groups"))), list(dim = c(7L, 11L), dimnames = list(GRPnames(g2), colnames(m)), groups = attr(gm, "groups"))) expect_identical(attributes(FUN(wlddev, g1, use.g.names = FALSE)), `[[<-`(attributes(wlddev), "row.names", value = seq_len(g1[[1L]]))) expect_identical(attributes(FUN(wlddev, g1)), `[[<-`(attributes(wlddev), "row.names", value = GRPnames(g1))) if(Sys.getenv("NCRAN") == "TRUE") { expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), g2, use.g.names = FALSE)), `[[<-`(`[[<-`(attributes(gmtc), "row.names", value = seq_len(g2[[1L]])), "class", "data.frame")) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), g2)), `[[<-`(`[[<-`(attributes(gmtc), "row.names", GRPnames(g2)), "class", "data.frame")) } expect_identical(attributes(FUN(gmtc)), list(names = c(fgroup_vars(gmtc, "names"), names(mtcars)[-c(2,8:9)]), row.names = seq_len(g2[[1L]]), class = "data.frame")) expect_identical(attributes(FUN(gmtc, use.g.names = TRUE)), list(names = c(fgroup_vars(gmtc, "names"), names(mtcars)[-c(2,8:9)]), row.names = GRPnames(g2), class = "data.frame")) expect_identical(attributes(FUN(gmtc, keep.group_vars = FALSE)), list(names = names(mtcars)[-c(2,8:9)], row.names = seq_len(g2[[1L]]), class = "data.frame")) expect_identical(attributes(FUN(gmtc, keep.group_vars = FALSE, use.g.names = TRUE)), list(names = names(mtcars)[-c(2,8:9)], row.names = GRPnames(g2), class = "data.frame")) } }) transFUN <- setdiff(c(.FAST_FUN, .OPERATOR_FUN), c(.FAST_STAT_FUN, "fhdbetween", "fhdwithin", "HDB", "HDW")) options(collapse_unused_arg_action = "none", warn = -1) test_that("transformation functions preserve all attributes", { for(i in transFUN) { # print(i) FUN <- match.fun(i) for(k in if(i %in% c("flag","L","F")) names(wlddev) else num_vars(wlddev, "names")) expect_identical(attributes(FUN(wlddev[[k]])), attributes(wlddev[[k]])) expect_identical(attributes(FUN(AirPassengers)), attributes(AirPassengers)) expect_identical(attributes(FUN(EuStockMarkets, stubs = FALSE, stub = FALSE)), attributes(EuStockMarkets)) expect_identical(attributes(FUN(m, stubs = FALSE, stub = FALSE)), attributes(m)) expect_identical(attributes(FUN(gm, stubs = FALSE, stub = FALSE)), attributes(gm)) expect_identical(attributes(FUN(if(i == "flag") wlddev else num_vars(wlddev), stubs = FALSE, stub = FALSE)), attributes(if(i == "flag") wlddev else num_vars(wlddev))) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), stubs = FALSE, stub = FALSE)), attributes(`oldClass<-`(gmtc, "data.frame"))) # Grouped for(k in if(i == "flag") names(wlddev) else num_vars(wlddev, "names")) expect_identical(attributes(FUN(wlddev[[k]], g = g1)), attributes(wlddev[[k]])) expect_identical(attributes(FUN(AirPassengers, g = f1)), attributes(AirPassengers)) expect_identical(attributes(FUN(EuStockMarkets, g = f2, stubs = FALSE, stub = FALSE)), attributes(EuStockMarkets)) expect_identical(attributes(FUN(m, g = g2, stubs = FALSE, stub = FALSE)), attributes(m)) expect_identical(attributes(FUN(gm, g = attr(gm, "groups"), stubs = FALSE, stub = FALSE)), attributes(gm)) expect_identical(attributes(FUN(if(i == "flag") wlddev else num_vars(wlddev), g = g1, by = g1, stubs = FALSE, stub = FALSE)), attributes(if(i == "flag") wlddev else num_vars(wlddev))) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), g = g2, by = g2, stubs = FALSE, stub = FALSE)), `[[<-`(attributes(gmtc), "class", "data.frame")) expect_identical(attributes(if(i %in% c("B","W", "STD")) FUN(gmtc, stub = FALSE) else FUN(gmtc, stubs = FALSE)), `[[<-`(attributes(gmtc), "names", c(fgroup_vars(gmtc, "names"), names(mtcars)[-c(2L ,8:9)]))) if(i %in% c("fcumsum", "flag", "L", "F", "fdiff", "D", "Dlog", "fgrowth", "G")) expect_identical(attributes(FUN(gmtc, keep.ids = FALSE, stubs = FALSE)), `[[<-`(attributes(gmtc), "names", names(mtcars)[-c(2L ,8:9)])) else expect_identical(attributes(FUN(gmtc, keep.group_vars = FALSE, stub = FALSE)), `[[<-`(attributes(gmtc), "names", names(mtcars)[-c(2L ,8:9)])) } }) options(collapse_unused_arg_action = "warning", warn = 1) test_that("TRA attribute preservation works well", { # Default Vector Method expect_equal(attributes(TRA(AirPassengers, 1, "replace")), attributes(AirPassengers)) # Double expect_equal(attributes(TRA(AirPassengers, 1L, "replace"))[[1]], tsp(AirPassengers)) # Integer -> Change of type !! expect_equal(attributes(TRA(AirPassengers, 1, "replace_fill")), attributes(AirPassengers)) # Double expect_equal(attributes(TRA(AirPassengers, 1L, "replace_fill"))[[1]], tsp(AirPassengers)) # Integer -> Change of type !! expect_equal(attributes(TRA(AirPassengers, 1, "-")), attributes(AirPassengers)) # Double expect_equal(attributes(TRA(AirPassengers, 1L, "-")), attributes(AirPassengers)) # Integer -> Coerced to double in numeric operation set.seed(101) f <- qF(sample.int(5L, length(AirPassengers), TRUE), na.exclude = FALSE) num <- unclass(fmean(AirPassengers, f)); int <- fnobs(AirPassengers, f) expect_equal(attributes(TRA(AirPassengers, num, "replace", f)), attributes(AirPassengers)) # Double expect_equal(attributes(TRA(AirPassengers, int, "replace", f))[[1]], tsp(AirPassengers)) # Integer -> Change of type !! expect_equal(attributes(TRA(AirPassengers, num, "replace_fill", f)), attributes(AirPassengers)) # Double expect_equal(attributes(TRA(AirPassengers, int, "replace_fill", f))[[1]], tsp(AirPassengers)) # Integer -> Change of type !! expect_equal(attributes(TRA(AirPassengers, num, "-", f)), attributes(AirPassengers)) # Double expect_equal(attributes(TRA(AirPassengers, int, "-", f)), attributes(AirPassengers)) # Integer -> Coerced to double in numeric operation # Matrix Method expect_equal(attributes(TRA(EuStockMarkets, rep(1, 4L), "replace")), attributes(EuStockMarkets)) # Double expect_equal(attributes(TRA(EuStockMarkets, rep(1L, 4L), "replace"))[["tsp"]], tsp(EuStockMarkets)) # Integer -> Change of type !! expect_equal(attributes(TRA(EuStockMarkets, rep(1, 4L), "replace_fill")), attributes(EuStockMarkets)) # Double expect_equal(attributes(TRA(EuStockMarkets, rep(1L, 4L), "replace_fill"))[["tsp"]], tsp(EuStockMarkets)) # Integer -> Change of type !! expect_equal(attributes(TRA(EuStockMarkets, rep(1, 4L), "-")), attributes(EuStockMarkets)) # Double expect_equal(attributes(TRA(EuStockMarkets, rep(1L, 4L), "-")), attributes(EuStockMarkets)) # Integer -> Coerced to double in numeric operation set.seed(101) f <- qF(sample.int(5L, nrow(EuStockMarkets), TRUE), na.exclude = FALSE) num <- unclass(fmean(EuStockMarkets, f)); int <- fnobs(EuStockMarkets, f) expect_equal(attributes(TRA(EuStockMarkets, num, "replace", f)), attributes(EuStockMarkets)) # Double expect_equal(attributes(TRA(EuStockMarkets, int, "replace", f))[["tsp"]], tsp(EuStockMarkets)) # Integer -> Change of type !! expect_equal(attributes(TRA(EuStockMarkets, num, "replace_fill", f)), attributes(EuStockMarkets)) # Double expect_equal(attributes(TRA(EuStockMarkets, int, "replace_fill", f))[["tsp"]], tsp(EuStockMarkets)) # Integer -> Change of type !! expect_equal(attributes(TRA(EuStockMarkets, num, "-", f)), attributes(EuStockMarkets)) # Double expect_equal(attributes(TRA(EuStockMarkets, int, "-", f)), attributes(EuStockMarkets)) # Integer -> Coerced to double in numeric operation # Data Frame Method # CATEGORICAL # Simple expect_equal(vclasses(unattrib(fndistinct(wlddev, TRA = "replace_fill"))), rep("integer", 13L)) expect_equal(vclasses(unattrib(fndistinct(wlddev, TRA = "replace"))), rep("integer", 13L)) expect_equal(vclasses(unattrib(fnobs(wlddev, TRA = "replace_fill"))), rep("integer", 13L)) expect_equal(vclasses(unattrib(fnobs(wlddev, TRA = "replace"))), rep("integer", 13L)) expect_equal(lapply(ffirst(wlddev, TRA = "replace_fill"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(ffirst(wlddev, TRA = "replace"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(flast(wlddev, TRA = "replace_fill"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(flast(wlddev, TRA = "replace"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(fmode(wlddev, TRA = "replace_fill"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(fmode(wlddev, TRA = "replace"), attributes), lapply(wlddev, attributes)) # Grouped expect_equal(vclasses(unattrib(fndistinct(wlddev, wlddev$iso3c, TRA = "replace_fill"))), rep("integer", 13L)) expect_equal(vclasses(unattrib(fndistinct(wlddev, wlddev$iso3c, TRA = "replace"))), rep("integer", 13L)) expect_equal(vclasses(unattrib(fnobs(wlddev, wlddev$iso3c, TRA = "replace_fill"))), rep("integer", 13L)) expect_equal(vclasses(unattrib(fnobs(wlddev, wlddev$iso3c, TRA = "replace"))), rep("integer", 13L)) expect_equal(lapply(ffirst(wlddev, wlddev$iso3c, TRA = "replace_fill"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(ffirst(wlddev, wlddev$iso3c, TRA = "replace"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(flast(wlddev, wlddev$iso3c, TRA = "replace_fill"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(flast(wlddev, wlddev$iso3c, TRA = "replace"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(fmode(wlddev, wlddev$iso3c, TRA = "replace_fill"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(fmode(wlddev, wlddev$iso3c, TRA = "replace"), attributes), lapply(wlddev, attributes)) # Numeric nwld <- num_vars(wlddev) # Simple expect_equal(vclasses(unattrib(fndistinct(nwld, TRA = "replace_fill"))), rep("integer", fncol(nwld))) expect_equal(vclasses(unattrib(fndistinct(nwld, TRA = "replace"))), rep("integer", fncol(nwld))) expect_equal(vclasses(unattrib(fndistinct(nwld, TRA = "-"))), rep("numeric", fncol(nwld))) expect_equal(vclasses(unattrib(fnobs(nwld, TRA = "replace_fill"))), rep("integer", fncol(nwld))) expect_equal(vclasses(unattrib(fnobs(nwld, TRA = "replace"))), rep("integer", fncol(nwld))) expect_equal(vclasses(unattrib(fnobs(nwld, TRA = "%%"))), rep("numeric", fncol(nwld))) expect_equal(lapply(fmean(nwld, TRA = "replace_fill"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmean(nwld, TRA = "replace"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmean(nwld, TRA = "+"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fsd(nwld, TRA = "replace_fill"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fsd(nwld, TRA = "replace"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fsd(nwld, TRA = "/"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmedian(nwld, TRA = "replace_fill"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmedian(nwld, TRA = "replace"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmedian(nwld, TRA = "-"), attributes), lapply(nwld, attributes)) # Grouped expect_equal(vclasses(unattrib(fndistinct(nwld, wlddev$iso3c, TRA = "replace_fill"))), rep("integer", fncol(nwld))) expect_equal(vclasses(unattrib(fndistinct(nwld, wlddev$iso3c, TRA = "replace"))), rep("integer", fncol(nwld))) expect_equal(vclasses(unattrib(fndistinct(nwld, wlddev$iso3c, TRA = "-%%"))), rep("numeric", fncol(nwld))) expect_equal(vclasses(unattrib(fnobs(nwld, wlddev$iso3c, TRA = "replace_fill"))), rep("integer", fncol(nwld))) expect_equal(vclasses(unattrib(fnobs(nwld, wlddev$iso3c, TRA = "replace"))), rep("integer", fncol(nwld))) expect_equal(vclasses(unattrib(fnobs(nwld, wlddev$iso3c, TRA = "*"))), rep("numeric", fncol(nwld))) expect_equal(lapply(fmean(nwld, wlddev$iso3c, TRA = "replace_fill"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmean(nwld, wlddev$iso3c, TRA = "replace"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmean(nwld, wlddev$iso3c, TRA = "-+"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fsd(nwld, wlddev$iso3c, TRA = "replace_fill"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fsd(nwld, wlddev$iso3c, TRA = "replace"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fsd(nwld, wlddev$iso3c, TRA = "/"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmedian(nwld, wlddev$iso3c, TRA = "replace_fill"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmedian(nwld, wlddev$iso3c, TRA = "replace"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmedian(nwld, wlddev$iso3c, TRA = "+"), attributes), lapply(nwld, attributes)) }) collapse/tests/testthat/test-psmat-psacf.R0000644000176200001440000002605514135105754020411 0ustar liggesuserscontext("psmat and psacf") # rm(list = ls()) options(warn = -1) test_that("psmat works as intended", { expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c, wlddev$year), psmat(wlddev, PCGDP ~ iso3c, ~ year)) expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c, wlddev$year), psmat(wlddev[9], wlddev$iso3c, wlddev$year)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year), psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, ~ year)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year), psmat(wlddev, ~ iso3c, ~ year, cols = 9:12)) # without year expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c), psmat(wlddev, PCGDP ~ iso3c)) expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c), psmat(wlddev[9], wlddev$iso3c)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c), psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c), psmat(wlddev, ~ iso3c, cols = 9:12)) # only nid's expect_identical(psmat(wlddev$PCGDP, 216), psmat(wlddev[9], 216)) expect_identical(psmat(wlddev[9:12], 216), psmat(wlddev, 216, cols = 9:12)) # TRANSPOSE expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c, wlddev$year, transpose = TRUE), `attr<-`(t(psmat(wlddev$PCGDP, wlddev$iso3c, wlddev$year)), "transpose", TRUE)) expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c, wlddev$year, transpose = TRUE), psmat(wlddev, PCGDP ~ iso3c, ~ year, transpose = TRUE)) expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c, wlddev$year, transpose = TRUE), psmat(wlddev[9], wlddev$iso3c, wlddev$year, transpose = TRUE)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year, transpose = TRUE), psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, ~ year, transpose = TRUE)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year, transpose = TRUE), psmat(wlddev, ~ iso3c, ~ year, cols = 9:12, transpose = TRUE)) # without year expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c, transpose = TRUE), psmat(wlddev, PCGDP ~ iso3c, transpose = TRUE)) expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c, transpose = TRUE), psmat(wlddev[9], wlddev$iso3c, transpose = TRUE)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, transpose = TRUE), psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, transpose = TRUE)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, transpose = TRUE), psmat(wlddev, ~ iso3c, cols = 9:12, transpose = TRUE)) # only nid's expect_identical(psmat(wlddev$PCGDP, 216, transpose = TRUE), psmat(wlddev[9], 216, transpose = TRUE)) expect_identical(psmat(wlddev[9:12], 216, transpose = TRUE), psmat(wlddev, 216, cols = 9:12, transpose = TRUE)) # LIST-OUTPUT expect_true(is.array(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year))) expect_true(is.list(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year, array = FALSE))) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year, array = FALSE), psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, ~ year, array = FALSE)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year, array = FALSE), psmat(wlddev, ~ iso3c, ~ year, cols = 9:12, array = FALSE)) # without year expect_identical(psmat(wlddev[9:12], wlddev$iso3c, array = FALSE), psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, array = FALSE)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, array = FALSE), psmat(wlddev, ~ iso3c, cols = 9:12, array = FALSE)) # only nid's expect_identical(psmat(wlddev[9:12], 216, array = FALSE), psmat(wlddev, 216, cols = 9:12, array = FALSE)) }) test_that("psacf works as intended", { x <- na_rm(wlddev$PCGDP) expect_equal(unclass(psacf(x, rep(1,length(x)), seq_along(x), lag.max = 12, plot = FALSE))[1:4], unclass(acf(x, lag.max = 12, plot = FALSE))[1:4], tolerance = 1e-3) expect_equal(unclass(psacf(x, rep(1,length(x)), seq_along(x), type = "covariance", lag.max = 12, gscale = FALSE, plot = FALSE))[1:4], unclass(acf(x, type = "covariance", lag.max = 12, plot = FALSE))[1:4], tolerance = 1e-3) expect_equal(unclass(pspacf(x, rep(1,length(x)), seq_along(x), lag.max = 12, plot = FALSE))[1:4], unclass(pacf(x, lag.max = 12, plot = FALSE))[1:4], tolerance = 1e-3) dat <- na_omit(get_vars(wlddev, c(9:10,12))) expect_equal(unclass(psacf(dat, rep(1,nrow(dat)), seq_row(dat), lag.max = 12, plot = FALSE))[1:4], unclass(acf(dat, lag.max = 12, plot = FALSE))[1:4], tolerance = 1e-2) expect_equal(unclass(psacf(dat, rep(1,nrow(dat)), seq_row(dat), type = "covariance", lag.max = 12, gscale = FALSE, plot = FALSE))[1:4], unclass(acf(dat, type = "covariance", lag.max = 12, plot = FALSE))[1:4], tolerance = 1e-2) # expect_equal(unclass(pspacf(dat, rep(1,nrow(dat)), seq_row(dat), lag.max = 12, plot = FALSE))[1:4], unclass(pacf(dat, lag.max = 12, plot = FALSE))[1:4], tolerance = 1e-2) # This is strange !!!! expect_equal(unclass(psacf(wlddev$PCGDP, wlddev$iso3c, wlddev$year, plot = FALSE))[1:4], unclass(psacf(wlddev, PCGDP ~ iso3c, ~ year, plot = FALSE))[1:4]) expect_equal(unclass(psacf(wlddev$PCGDP, wlddev$iso3c, wlddev$year, plot = FALSE))[1:4], unclass(psacf(wlddev[9], wlddev$iso3c, wlddev$year, plot = FALSE))[1:4]) expect_equal(unclass(psacf(wlddev[9:12], wlddev$iso3c, wlddev$year, plot = FALSE))[1:4], unclass(psacf(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, ~ year, plot = FALSE))[1:4]) expect_equal(unclass(psacf(wlddev[9:12], wlddev$iso3c, wlddev$year, plot = FALSE))[1:4], unclass(psacf(wlddev, ~ iso3c, ~ year, cols = 9:12, plot = FALSE))[1:4]) # equality expect_equal(unclass(psacf(wlddev$PCGDP, wlddev$iso3c, plot = FALSE))[1:4], unclass(psacf(wlddev$PCGDP, wlddev$iso3c, wlddev$year, plot = FALSE))[1:4]) # without year expect_equal(unclass(psacf(wlddev$PCGDP, wlddev$iso3c, plot = FALSE))[1:4], unclass(psacf(wlddev, PCGDP ~ iso3c, plot = FALSE))[1:4]) expect_equal(unclass(psacf(wlddev$PCGDP, wlddev$iso3c, plot = FALSE))[1:4], unclass(psacf(wlddev[9], wlddev$iso3c, plot = FALSE))[1:4]) expect_equal(unclass(psacf(wlddev[9:12], wlddev$iso3c, plot = FALSE))[1:4], unclass(psacf(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, plot = FALSE))[1:4]) expect_equal(unclass(psacf(wlddev[9:12], wlddev$iso3c, plot = FALSE))[1:4], unclass(psacf(wlddev, ~ iso3c, cols = 9:12, plot = FALSE))[1:4]) }) test_that("pspacf works as intended", { expect_equal(unclass(pspacf(wlddev$PCGDP, wlddev$iso3c, wlddev$year, plot = FALSE))[1:4], unclass(pspacf(wlddev, PCGDP ~ iso3c, ~ year, plot = FALSE))[1:4]) expect_equal(unclass(pspacf(wlddev$PCGDP, wlddev$iso3c, wlddev$year, plot = FALSE))[1:4], unclass(pspacf(wlddev[9], wlddev$iso3c, wlddev$year, plot = FALSE))[1:4]) expect_equal(unclass(pspacf(wlddev[9:12], wlddev$iso3c, wlddev$year, plot = FALSE))[1:4], unclass(pspacf(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, ~ year, plot = FALSE))[1:4]) expect_equal(unclass(pspacf(wlddev[9:12], wlddev$iso3c, wlddev$year, plot = FALSE))[1:4], unclass(pspacf(wlddev, ~ iso3c, ~ year, cols = 9:12, plot = FALSE))[1:4]) # equality expect_equal(unclass(pspacf(wlddev$PCGDP, wlddev$iso3c, plot = FALSE))[1:4], unclass(pspacf(wlddev$PCGDP, wlddev$iso3c, wlddev$year, plot = FALSE))[1:4]) # without year expect_equal(unclass(pspacf(wlddev$PCGDP, wlddev$iso3c, plot = FALSE))[1:4], unclass(pspacf(wlddev, PCGDP ~ iso3c, plot = FALSE))[1:4]) expect_equal(unclass(pspacf(wlddev$PCGDP, wlddev$iso3c, plot = FALSE))[1:4], unclass(pspacf(wlddev[9], wlddev$iso3c, plot = FALSE))[1:4]) expect_equal(unclass(pspacf(wlddev[9:12], wlddev$iso3c, plot = FALSE))[1:4], unclass(pspacf(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, plot = FALSE))[1:4]) expect_equal(unclass(pspacf(wlddev[9:12], wlddev$iso3c, plot = FALSE))[1:4], unclass(pspacf(wlddev, ~ iso3c, cols = 9:12, plot = FALSE))[1:4]) }) test_that("psmat gives errors for wrong input", { # wrong lengths expect_error(psmat(wlddev$PCGDP, wlddev$iso3c[-1], wlddev$year)) expect_error(psmat(wlddev$PCGDP, wlddev$iso3c, wlddev$year[-1])) expect_error(psmat(wlddev[9:12], wlddev$iso3c[-1], wlddev$year)) expect_error(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year[-1])) # without year expect_error(psmat(wlddev$PCGDP, wlddev$iso3c[-1])) expect_error(psmat(wlddev[9:12], wlddev$iso3c[-1])) # only nid's expect_error(psmat(wlddev$PCGDP, 218)) expect_error(psmat(wlddev[9:12], 218)) # wrong formula expect_error(psmat(wlddev, PCGDP2 ~ iso3c, ~ year)) expect_error(psmat(wlddev, PCGDP ~ iso3c2, ~ year)) expect_error(psmat(wlddev, PCGDP ~ iso3c, ~ year2)) expect_error(psmat(wlddev, PCGDP + LIFEEX + GINI + ODA + bla ~ iso3c, ~ year)) expect_error(psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c + bla, ~ year)) expect_error(psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, ~ year + bla)) # without year expect_error(psmat(wlddev, PCGDP2 ~ iso3c)) expect_error(psmat(wlddev, PCGDP ~ iso3c2)) expect_error(psmat(wlddev, PCGDP + LIFEEX + GINI + ODA + bla ~ iso3c, ~ year)) expect_error(psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c + bla, ~ year)) # cols expect_error(psmat(wlddev, ~ iso3c, ~ year, cols = 14)) expect_error(psmat(wlddev, ~ iso3c, ~ year, cols = "bla")) expect_visible(psmat(wlddev, ~ iso3c, ~ year, cols = sapply(wlddev, is.numeric))) expect_error(psmat(wlddev, ~ iso3c, ~ year, cols = sapply(wlddev, is.numeric)[-1])) }) test_that("psacf gives errors for wrong input", { # wrong lengths expect_error(psacf(wlddev$PCGDP, wlddev$iso3c[-1], wlddev$year, plot = FALSE)) expect_error(psacf(wlddev$PCGDP, wlddev$iso3c, wlddev$year[-1], plot = FALSE)) expect_error(psacf(wlddev[9:12], wlddev$iso3c[-1], wlddev$year, plot = FALSE)) expect_error(psacf(wlddev[9:12], wlddev$iso3c, wlddev$year[-1], plot = FALSE)) # without year expect_error(psacf(wlddev$PCGDP, wlddev$iso3c[-1], plot = FALSE)) expect_error(psacf(wlddev[9:12], wlddev$iso3c[-1], plot = FALSE)) # this should give error... expect_error(psacf(wlddev$PCGDP, 218, plot = FALSE)) expect_error(psacf(wlddev[9:12], 218, plot = FALSE)) # wrong formula expect_error(psacf(wlddev, PCGDP2 ~ iso3c, ~ year, plot = FALSE)) expect_error(psacf(wlddev, PCGDP ~ iso3c2, ~ year, plot = FALSE)) expect_error(psacf(wlddev, PCGDP ~ iso3c, ~ year2, plot = FALSE)) expect_error(psacf(wlddev, PCGDP + LIFEEX + GINI + ODA + bla ~ iso3c, ~ year, plot = FALSE)) expect_error(psacf(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c + bla, ~ year, plot = FALSE)) expect_error(psacf(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, ~ year + bla, plot = FALSE)) # without year expect_error(psacf(wlddev, PCGDP2 ~ iso3c, plot = FALSE)) expect_error(psacf(wlddev, PCGDP ~ iso3c2, plot = FALSE)) expect_error(psacf(wlddev, PCGDP + LIFEEX + GINI + ODA + bla ~ iso3c, ~ year, plot = FALSE)) expect_error(psacf(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c + bla, ~ year, plot = FALSE)) # cols expect_error(psacf(wlddev, ~ iso3c, ~ year, cols = 14, plot = FALSE)) expect_error(psacf(wlddev, ~ iso3c, ~ year, cols = "bla", plot = FALSE)) expect_visible(psacf(wlddev, ~ iso3c, ~ year, cols = sapply(wlddev, is.numeric), plot = FALSE)) expect_error(psacf(wlddev, ~ iso3c, ~ year, cols = sapply(wlddev, is.numeric)[-1], plot = FALSE)) }) options(warn = 1) collapse/tests/testthat/test-roworder-colorder-rename.R0000644000176200001440000000770514166304677023124 0ustar liggesuserscontext("roworder, colorder, frename") test_that("roworder works as intended", { expect_identical(roworder(mtcars, cyl, -hp), mtcars[with(mtcars, order(cyl, -hp)), ]) expect_identical(roworder(airquality, Month, -Ozone), setRownames(airquality[with(airquality, order(Month, -Ozone)), ])) expect_identical(fnrow(roworder(airquality, Month, -Ozone, na.last = NA)), 116L) # Removes the missing values in Ozone ## Same in standard evaluation expect_identical(roworderv(airquality, c("Month", "Ozone"), decreasing = c(FALSE, TRUE)), roworder(airquality, Month, -Ozone)) ## Custom reordering expect_identical(roworderv(mtcars, neworder = 3:4), rbind(mtcars[3:4, ], mtcars[-(3:4), ])) # Bring rows 3 and 4 to the front expect_identical(roworderv(mtcars, neworder = 3:4, pos = "end"), rbind(mtcars[-(3:4), ], mtcars[3:4, ])) # Bring them to the end expect_identical(roworderv(mtcars, neworder = mtcars$vs == 1), rbind(mtcars[mtcars$vs == 1, ], mtcars[mtcars$vs != 1, ])) # Bring rows with vs == 1 to the top expect_identical(ss(roworderv(mtcars, neworder = c(8, 2), pos = "exchange"), c(2,8)), ss(mtcars, c(8,2))) }) if(identical(Sys.getenv("NCRAN"), "TRUE")) { library(magrittr) test_that("colorder works as intended", { expect_identical(colorder(mtcars, vs, cyl:hp, am), fselect(mtcars, vs, cyl:hp, am, return = "indices") %>% {cbind(mtcars[.], mtcars[-.])}) expect_identical(colorder(mtcars, vs, cyl:hp, am, pos = "end"), fselect(mtcars, vs, cyl:hp, am, return = "indices") %>% {cbind(mtcars[-.], mtcars[.])}) expect_identical(colorder(mtcars, vs, cyl:hp, am, pos = "exchange"), fselect(mtcars, vs, cyl:hp, am, return = "indices") %>% {`get_vars<-`(mtcars, sort(.), value = mtcars[.])}) ## Same in standard evaluation expect_identical(colorder(mtcars, vs, cyl:hp, am), colorderv(mtcars, c(8, 2:4, 9))) expect_identical(colorder(mtcars, vs, cyl:hp, am, pos = "end"), colorderv(mtcars, c(8, 2:4, 9), pos = "end")) expect_identical(colorder(mtcars, vs, cyl:hp, am, pos = "exchange"), colorderv(mtcars, c(8, 2:4, 9), pos = "exchange")) expect_identical(colorder(mtcars, vs, cyl, am), colorderv(mtcars, c("vs", "cyl|am"), regex = TRUE)) }) } test_that("frename works as intended", { ## Using tagged expressions expect_equal(frename(iris, Sepal.Length = SL, Sepal.Width = SW, Petal.Length = PL, Petal.Width = PW), setNames(iris, .c(SL, SW, PL, PW, Species))) expect_equal(frename(iris, Sepal.Length = "S L", Sepal.Width = "S W", Petal.Length = "P L", Petal.Width = "P W"), setNames(iris, c("S L", "S W", "P L", "P W", "Species"))) ## Using a function expect_equal(frename(iris, tolower), setNames(iris, tolower(names(iris)))) expect_equal(frename(iris, tolower, cols = 1:2), setNames(iris, c(tolower(names(iris)[1:2]), names(iris)[-(1:2)]))) expect_equal(frename(iris, tolower, cols = is.numeric), setNames(iris, c(tolower(names(iris)[1:4]), names(iris)[-(1:4)]))) expect_equal(frename(iris, paste, "new", sep = "_", cols = 1:2), setNames(iris, c(paste(names(iris)[1:2], "new", sep = "_"), names(iris)[-(1:2)]))) ## Renaming by reference iris2 <- data.table::copy(iris) setrename(iris2, tolower) expect_equal(iris2, setNames(iris, tolower(names(iris)))) iris2 <- data.table::copy(iris) setrename(iris2, tolower, cols = 1:2) expect_equal(iris2, setNames(iris, c(tolower(names(iris)[1:2]), names(iris)[-(1:2)]))) iris2 <- data.table::copy(iris) setrename(iris2, tolower, cols = is.numeric) expect_equal(iris2, setNames(iris, c(tolower(names(iris)[1:4]), names(iris)[-(1:4)]))) iris2 <- data.table::copy(iris) setrename(iris2, paste, "new", sep = "_", cols = 1:2) expect_equal(iris2, setNames(iris, c(paste(names(iris)[1:2], "new", sep = "_"), names(iris)[-(1:2)]))) rm(iris2) }) collapse/tests/testthat/test-fvar-fsd.R0000644000176200001440000016556014167361174017715 0ustar liggesuserscontext("fvar and fsd") bvar <- stats::var bsd <- stats::sd bsum <- base::sum # rm(list = ls()) set.seed(101) x <- rnorm(100) w <- abs(100*rnorm(100)) wdat <- abs(100*rnorm(32)) xNA <- x wNA <- w wdatNA <- wdat xNA[sample.int(100,20)] <- NA wNA[sample.int(100,20)] <- NA wdatNA[sample.int(32, 5)] <- NA f <- as.factor(sample.int(10, 100, TRUE)) g <- GRP(mtcars, ~ cyl + vs + am) gf <- as_factor_GRP(g) mtcNA <- na_insert(mtcars) mtcNA[27,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" na20 <- function(x) { x[is.na(x)] <- 0 x } # This is correct, including Bessels correction. wvar <- function(x, w, na.rm = FALSE) { if(na.rm) { cc <- complete.cases(x, w) x <- x[cc] # if(length(x) < 2L) return(NA_real_) w <- w[cc] } # else if(length(x) < 2L) return(if(is.na(x)) NA_real_ else 0) bsum(w*(x-weighted.mean(x,w))^2)/(bsum(w)-1) } wBY <- function(x, f, FUN, w, ...) { if(is.atomic(x) && !is.array(x)) return(mapply(FUN, split(x, f), split(w, f), ...)) wspl <- split(w, f) if(is.atomic(x)) return(dapply(x, function(xi) mapply(FUN, split(xi, f), wspl, ...))) qDF(dapply(x, function(xi) mapply(FUN, split(xi, f), wspl, ...), return = "matrix")) } # fvar using Welford's Algoritm (default) test_that("fvar performs like base::var", { expect_equal(fvar(NA), bvar(NA)) expect_equal(fvar(NA, na.rm = FALSE), bvar(NA)) expect_equal(fvar(1), bvar(1, na.rm = TRUE)) expect_equal(fvar(1:3), bvar(1:3, na.rm = TRUE)) expect_equal(fvar(-1:1), bvar(-1:1, na.rm = TRUE)) expect_equal(fvar(1, na.rm = FALSE), bvar(1)) expect_equal(fvar(1:3, na.rm = FALSE), bvar(1:3)) expect_equal(fvar(-1:1, na.rm = FALSE), bvar(-1:1)) expect_equal(fvar(x), bvar(x, na.rm = TRUE)) expect_equal(fvar(x, na.rm = FALSE), bvar(x)) expect_equal(fvar(xNA, na.rm = FALSE), bvar(xNA)) expect_equal(fvar(xNA), bvar(xNA, na.rm = TRUE)) expect_equal(fvar(mtcars), fvar(m)) expect_equal(fvar(m), dapply(m, bvar, na.rm = TRUE)) expect_equal(fvar(m, na.rm = FALSE), dapply(m, bvar)) expect_equal(fvar(mNA, na.rm = FALSE), dapply(mNA, bvar)) expect_equal(fvar(mNA), dapply(mNA, bvar, na.rm = TRUE)) expect_equal(fvar(mtcars), dapply(mtcars, bvar, na.rm = TRUE)) expect_equal(fvar(mtcars, na.rm = FALSE), dapply(mtcars, bvar)) expect_equal(fvar(mtcNA, na.rm = FALSE), dapply(mtcNA, bvar)) expect_equal(fvar(mtcNA), dapply(mtcNA, bvar, na.rm = TRUE)) expect_equal(fvar(x, f), BY(x, f, bvar, na.rm = TRUE)) expect_equal(fvar(x, f, na.rm = FALSE), BY(x, f, bvar)) expect_equal(fvar(xNA, f, na.rm = FALSE), BY(xNA, f, bvar)) expect_equal(fvar(xNA, f), BY(xNA, f, bvar, na.rm = TRUE)) expect_equal(fvar(m, g), BY(m, g, bvar, na.rm = TRUE)) expect_equal(fvar(m, g, na.rm = FALSE), BY(m, g, bvar)) expect_equal(fvar(mNA, g, na.rm = FALSE), BY(mNA, g, bvar)) expect_equal(fvar(mNA, g), BY(mNA, g, bvar, na.rm = TRUE)) expect_equal(fvar(mtcars, g), BY(mtcars, g, bvar, na.rm = TRUE)) expect_equal(fvar(mtcars, g, na.rm = FALSE), BY(mtcars, g, bvar)) expect_equal(fvar(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bvar)) expect_equal(fvar(mtcNA, g), BY(mtcNA, g, bvar, na.rm = TRUE)) }) test_that("fvar with weights performs as intended (unbiased)", { expect_equal(fvar(c(2,2,4,5,5,5)), fvar(c(2,4,5), w = c(2,1,3))) expect_equal(fvar(c(2,2,4,5,5,5), na.rm = FALSE), fvar(c(2,4,5), w = c(2,1,3), na.rm = FALSE)) expect_equal(fvar(c(2.456,2.456,4.123,5.009,5.009,5.009)), fvar(c(2.456,4.123,5.009), w = c(2,1,3))) expect_equal(fvar(c(2.456,2.456,4.123,5.009,5.009,5.009), na.rm = FALSE), fvar(c(2.456,4.123,5.009), w = c(2,1,3), na.rm = FALSE)) expect_equal(fvar(c(2,2,NA,5,5,5)), fvar(c(2,NA,5), w = c(2,1,3))) expect_equal(fvar(c(2,2,NA,5,5,5), na.rm = FALSE), fvar(c(2,NA,5), w = c(2,1,3), na.rm = FALSE)) expect_equal(fvar(c(2,2,NA,5,5,5)), fvar(c(2,4,5), w = c(2,NA,3))) expect_equal(fvar(c(2,2,NA,5,5,5), na.rm = FALSE), fvar(c(2,4,5), w = c(2,NA,3), na.rm = FALSE)) expect_equal(fvar(c(NA,NA,4.123,5.009,5.009,5.009)), fvar(c(NA,4.123,5.009), w = c(2,1,3))) expect_equal(fvar(c(NA,NA,4.123,5.009,5.009,5.009), na.rm = FALSE), fvar(c(NA,4.123,5.009), w = c(2,1,3), na.rm = FALSE)) expect_equal(fvar(c(NA,NA,4.123,5.009,5.009,5.009)), fvar(c(2.456,4.123,5.009), w = c(NA,1,3))) expect_equal(fvar(c(NA,NA,4.123,5.009,5.009,5.009), na.rm = FALSE), fvar(c(2.456,4.123,5.009), w = c(NA,1,3), na.rm = FALSE)) f <- as.factor(rep(1:2, each = 6)); fs <- as.factor(rep(1:2, each = 3)) v <- c(2,2,4,5,5,5,2,2,4,5,5,5); vs <- c(2,4,5,2,4,5); w <- c(2,1,3,2,1,3) v2 <- c(2.456,2.456,4.123,5.009,5.009,5.009,2.456,2.456,4.123,5.009,5.009,5.009); v2s <- c(2.456,4.123,5.009,2.456,4.123,5.009) expect_equal(fvar(v, f), fvar(vs, fs, w)) expect_equal(fvar(v, f, na.rm = FALSE), fvar(vs, fs, w, na.rm = FALSE)) expect_equal(fvar(v2, f), fvar(v2s, fs, w)) expect_equal(fvar(v2, f, na.rm = FALSE), fvar(v2s, fs, w, na.rm = FALSE)) v[c(3,9)] <- NA; vs[c(2,5)] <- NA expect_equal(fvar(v, f), fvar(vs, fs, w)) expect_equal(fvar(v, f, na.rm = FALSE), fvar(vs, fs, w, na.rm = FALSE)) vs[c(2,5)] <- 4; w[c(2,5)] <- NA expect_equal(fvar(v, f), fvar(vs, fs, w)) expect_equal(fvar(v, f, na.rm = FALSE), fvar(vs, fs, w, na.rm = FALSE)) w[c(2,5)] <- 1; v2[c(1:2,7:8)] <- NA; v2s[c(1,4)] <- NA expect_equal(fvar(v2, f), fvar(v2s, fs, w)) expect_equal(fvar(v2, f, na.rm = FALSE), fvar(v2s, fs, w, na.rm = FALSE)) v2s[c(1,4)] <- 2.456; w[c(1,4)] <- NA expect_equal(fvar(v2, f), fvar(v2s, fs, w)) expect_equal(fvar(v2, f, na.rm = FALSE), fvar(v2s, fs, w, na.rm = FALSE)) }) test_that("fvar performs like fvar with unit weights", { expect_equal(fvar(NA), fvar(NA, w = 1)) expect_equal(fvar(NA, na.rm = FALSE), fvar(NA, w = 1, na.rm = FALSE)) expect_equal(fvar(1), fvar(1, w = 1)) expect_equal(fvar(1:3), fvar(1:3, w = rep(1,3))) expect_equal(fvar(-1:1), fvar(-1:1, w = rep(1,3))) expect_equal(fvar(1, na.rm = FALSE), fvar(1, w = 1, na.rm = FALSE)) expect_equal(fvar(1:3, na.rm = FALSE), fvar(1:3, w = rep(1, 3), na.rm = FALSE)) expect_equal(fvar(-1:1, na.rm = FALSE), fvar(-1:1, w = rep(1, 3), na.rm = FALSE)) expect_equal(fvar(x), fvar(x, w = rep(1,100))) expect_equal(fvar(x, na.rm = FALSE), fvar(x, w = rep(1, 100), na.rm = FALSE)) expect_equal(fvar(xNA, na.rm = FALSE), fvar(xNA, w = rep(1, 100), na.rm = FALSE)) expect_equal(fvar(xNA), fvar(xNA, w = rep(1, 100))) expect_equal(fvar(m), fvar(m, w = rep(1, 32))) expect_equal(fvar(m, na.rm = FALSE), fvar(m, w = rep(1, 32), na.rm = FALSE)) expect_equal(fvar(mNA, na.rm = FALSE), fvar(mNA, w = rep(1, 32), na.rm = FALSE)) expect_equal(fvar(mNA), fvar(mNA, w = rep(1, 32))) expect_equal(fvar(mtcars), fvar(mtcars, w = rep(1, 32))) expect_equal(fvar(mtcars, na.rm = FALSE), fvar(mtcars, w = rep(1, 32), na.rm = FALSE)) expect_equal(fvar(mtcNA, na.rm = FALSE), fvar(mtcNA, w = rep(1, 32), na.rm = FALSE)) expect_equal(fvar(mtcNA), fvar(mtcNA, w = rep(1, 32))) expect_equal(fvar(x, f), fvar(x, f, rep(1,100))) expect_equal(fvar(x, f, na.rm = FALSE), fvar(x, f, rep(1,100), na.rm = FALSE)) expect_equal(fvar(xNA, f, na.rm = FALSE), fvar(xNA, f, rep(1,100), na.rm = FALSE)) expect_equal(fvar(xNA, f), fvar(xNA, f, rep(1,100))) expect_equal(fvar(m, g), fvar(m, g, rep(1,32))) expect_equal(fvar(m, g, na.rm = FALSE), fvar(m, g, rep(1,32), na.rm = FALSE)) expect_equal(fvar(mNA, g, na.rm = FALSE), fvar(mNA, g, rep(1,32), na.rm = FALSE)) expect_equal(fvar(mNA, g), fvar(mNA, g, rep(1,32))) expect_equal(fvar(mtcars, g), fvar(mtcars, g, rep(1,32))) expect_equal(fvar(mtcars, g, na.rm = FALSE), fvar(mtcars, g, rep(1,32), na.rm = FALSE)) expect_equal(fvar(mtcNA, g, na.rm = FALSE), fvar(mtcNA, g, rep(1,32), na.rm = FALSE)) expect_equal(fvar(mtcNA, g), fvar(mtcNA, g, rep(1,32))) }) test_that("fvar with weights performs like wvar (defined above)", { # complete weights expect_equal(fvar(NA, w = 1), wvar(NA, 1)) expect_equal(fvar(NA, w = 1, na.rm = FALSE), wvar(NA, 1)) expect_equal(fvar(1, w = 1), wvar(1, w = 1)) expect_equal(fvar(1:3, w = 1:3), wvar(1:3, 1:3)) expect_equal(fvar(-1:1, w = 1:3), wvar(-1:1, 1:3)) expect_equal(fvar(1, w = 1, na.rm = FALSE), wvar(1, 1)) expect_equal(fvar(1:3, w = c(0.99,3454,1.111), na.rm = FALSE), wvar(1:3, c(0.99,3454,1.111))) expect_equal(fvar(-1:1, w = 1:3, na.rm = FALSE), wvar(-1:1, 1:3)) expect_equal(fvar(x, w = w), wvar(x, w)) expect_equal(fvar(x, w = w, na.rm = FALSE), wvar(x, w)) expect_equal(fvar(xNA, w = w, na.rm = FALSE), wvar(xNA, w)) expect_equal(fvar(xNA, w = w), wvar(xNA, w, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdat), fvar(m, w = wdat)) expect_equal(fvar(m, w = wdat), dapply(m, wvar, wdat, na.rm = TRUE)) expect_equal(fvar(m, w = wdat, na.rm = FALSE), dapply(m, wvar, wdat)) expect_equal(fvar(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wvar, wdat)) expect_equal(fvar(mNA, w = wdat), dapply(mNA, wvar, wdat, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdat), dapply(mtcars, wvar, wdat, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wvar, wdat)) expect_equal(fvar(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wvar, wdat)) expect_equal(fvar(mtcNA, w = wdat), dapply(mtcNA, wvar, wdat, na.rm = TRUE)) expect_equal(fvar(x, f, w), wBY(x, f, wvar, w)) expect_equal(fvar(x, f, w, na.rm = FALSE), wBY(x, f, wvar, w)) expect_equal(fvar(xNA, f, w, na.rm = FALSE), wBY(xNA, f, wvar, w)) expect_equal(na20(fvar(xNA, f, w)), na20(wBY(xNA, f, wvar, w, na.rm = TRUE))) expect_equal(fvar(m, g, wdat), wBY(m, gf, wvar, wdat)) expect_equal(fvar(m, g, wdat, na.rm = FALSE), wBY(m, gf, wvar, wdat)) expect_equal(fvar(mNA, g, wdat, na.rm = FALSE), wBY(mNA, gf, wvar, wdat)) expect_equal(na20(fvar(mNA, g, wdat)), na20(wBY(mNA, gf, wvar, wdat, na.rm = TRUE))) expect_equal(fvar(mtcars, g, wdat), wBY(mtcars, gf, wvar, wdat)) expect_equal(fvar(mtcars, g, wdat, na.rm = FALSE), wBY(mtcars, gf, wvar, wdat)) expect_equal(fvar(mtcNA, g, wdat, na.rm = FALSE), wBY(mtcNA, gf, wvar, wdat)) expect_equal(na20(fvar(mtcNA, g, wdat)), na20(wBY(mtcNA, gf, wvar, wdat, na.rm = TRUE))) # missing weights expect_equal(fvar(NA, w = NA), wvar(NA, NA)) expect_equal(fvar(NA, w = NA, na.rm = FALSE), wvar(NA, NA)) expect_equal(fvar(1, w = NA), wvar(1, w = NA)) expect_equal(fvar(1:3, w = c(NA,1:2)), wvar(1:3, c(NA,1:2), na.rm = TRUE)) expect_equal(fvar(-1:1, w = c(NA,1:2)), wvar(-1:1, c(NA,1:2), na.rm = TRUE)) expect_equal(fvar(1, w = NA, na.rm = FALSE), wvar(1, NA)) expect_equal(fvar(1:3, w = c(NA,1:2), na.rm = FALSE), wvar(1:3, c(NA,1:2))) expect_equal(fvar(-1:1, w = c(NA,1:2), na.rm = FALSE), wvar(-1:1, c(NA,1:2))) expect_equal(fvar(x, w = wNA), wvar(x, wNA, na.rm = TRUE)) expect_equal(fvar(x, w = wNA, na.rm = FALSE), wvar(x, wNA)) expect_equal(fvar(xNA, w = wNA, na.rm = FALSE), wvar(xNA, wNA)) expect_equal(fvar(xNA, w = wNA), wvar(xNA, wNA, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdatNA), fvar(m, w = wdatNA)) expect_equal(fvar(m, w = wdatNA), dapply(m, wvar, wdatNA, na.rm = TRUE)) expect_equal(fvar(m, w = wdatNA, na.rm = FALSE), dapply(m, wvar, wdatNA)) expect_equal(fvar(mNA, w = wdatNA, na.rm = FALSE), dapply(mNA, wvar, wdatNA)) expect_equal(fvar(mNA, w = wdatNA), dapply(mNA, wvar, wdatNA, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdatNA), dapply(mtcars, wvar, wdatNA, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdatNA, na.rm = FALSE), dapply(mtcars, wvar, wdatNA)) expect_equal(fvar(mtcNA, w = wdatNA, na.rm = FALSE), dapply(mtcNA, wvar, wdatNA)) expect_equal(fvar(mtcNA, w = wdatNA), dapply(mtcNA, wvar, wdatNA, na.rm = TRUE)) expect_equal(na20(fvar(x, f, wNA)), na20(wBY(x, f, wvar, wNA, na.rm = TRUE))) expect_equal(fvar(x, f, wNA, na.rm = FALSE), wBY(x, f, wvar, wNA)) expect_equal(fvar(xNA, f, wNA, na.rm = FALSE), wBY(xNA, f, wvar, wNA)) expect_equal(na20(fvar(xNA, f, wNA)), na20(wBY(xNA, f, wvar, wNA, na.rm = TRUE))) expect_equal(na20(fvar(m, g, wdatNA)), na20(wBY(m, gf, wvar, wdatNA, na.rm = TRUE))) expect_equal(fvar(m, g, wdatNA, na.rm = FALSE), wBY(m, gf, wvar, wdatNA)) expect_equal(fvar(mNA, g, wdatNA, na.rm = FALSE), wBY(mNA, gf, wvar, wdatNA)) expect_equal(na20(fvar(mNA, g, wdatNA)), na20(wBY(mNA, gf, wvar, wdatNA, na.rm = TRUE))) expect_equal(na20(fvar(mtcars, g, wdatNA)), na20(wBY(mtcars, gf, wvar, wdatNA, na.rm = TRUE))) expect_equal(fvar(mtcars, g, wdatNA, na.rm = FALSE), wBY(mtcars, gf, wvar, wdatNA)) expect_equal(fvar(mtcNA, g, wdatNA, na.rm = FALSE), wBY(mtcNA, gf, wvar, wdatNA)) expect_equal(na20(fvar(mtcNA, g, wdatNA)), na20(wBY(mtcNA, gf, wvar, wdatNA, na.rm = TRUE))) }) test_that("fvar performs numerically stable", { expect_true(all_obj_equal(replicate(50, fvar(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g), simplify = FALSE))) }) test_that("fvar with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fvar(1, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, w = 1, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fvar with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fvar(1, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, w = NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, wdatNA), simplify = FALSE))) }) test_that("fvar handles special values in the right way", { expect_equal(fvar(NA), NA_real_) expect_equal(fvar(NaN), NA_real_) expect_equal(fvar(Inf), NA_real_) expect_equal(fvar(-Inf), NA_real_) expect_equal(fvar(TRUE), NA_real_) expect_equal(fvar(FALSE), NA_real_) expect_equal(fvar(NA, na.rm = FALSE), NA_real_) expect_equal(fvar(NaN, na.rm = FALSE), NA_real_) expect_equal(fvar(Inf, na.rm = FALSE), NA_real_) expect_equal(fvar(-Inf, na.rm = FALSE), NA_real_) expect_equal(fvar(TRUE, na.rm = FALSE), NA_real_) expect_equal(fvar(FALSE, na.rm = FALSE), NA_real_) expect_equal(fvar(c(1,NA)), NA_real_) expect_equal(fvar(c(1,NaN)), NA_real_) expect_equal(fvar(c(1,Inf)), NA_real_) expect_equal(fvar(c(1,-Inf)), NA_real_) expect_equal(fvar(c(FALSE,TRUE)), 0.5) expect_equal(fvar(c(FALSE,FALSE)), 0) expect_equal(fvar(c(1,Inf), na.rm = FALSE), NA_real_) expect_equal(fvar(c(1,-Inf), na.rm = FALSE), NA_real_) expect_equal(fvar(c(FALSE,TRUE), na.rm = FALSE), 0.5) expect_equal(fvar(c(FALSE,FALSE), na.rm = FALSE), 0) }) test_that("fvar with weights handles special values in the right way", { expect_equal(fvar(NA, w = 1), NA_real_) expect_equal(fvar(NaN, w = 1), NA_real_) expect_equal(fvar(Inf, w = 1), NA_real_) expect_equal(fvar(-Inf, w = 1), NA_real_) expect_equal(fvar(TRUE, w = 1), NA_real_) expect_equal(fvar(FALSE, w = 1), NA_real_) expect_equal(fvar(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fvar(NaN, w = 1, na.rm = FALSE), NA_real_) expect_equal(fvar(Inf, w = 1, na.rm = FALSE), NA_real_) expect_equal(fvar(-Inf, w = 1, na.rm = FALSE), NA_real_) expect_equal(fvar(TRUE, w = 1, na.rm = FALSE), NA_real_) expect_equal(fvar(FALSE, w = 1, na.rm = FALSE), NA_real_) expect_equal(fvar(NA, w = NA), NA_real_) expect_equal(fvar(NaN, w = NA), NA_real_) expect_equal(fvar(Inf, w = NA), NA_real_) expect_equal(fvar(-Inf, w = NA), NA_real_) expect_equal(fvar(TRUE, w = NA), NA_real_) expect_equal(fvar(FALSE, w = NA), NA_real_) expect_equal(fvar(NA, w = NA, na.rm = FALSE), NA_real_) expect_equal(fvar(NaN, w = NA, na.rm = FALSE), NA_real_) expect_equal(fvar(Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fvar(-Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fvar(TRUE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fvar(FALSE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fvar(1:3, w = c(1,Inf,3)), NA_real_) expect_equal(fvar(1:3, w = c(1,-Inf,3)), NA_real_) expect_equal(fvar(1:3, w = c(1,Inf,3), na.rm = FALSE), NA_real_) expect_equal(fvar(1:3, w = c(1,-Inf,3), na.rm = FALSE), NA_real_) }) test_that("fvar produces errors for wrong input", { expect_error(fvar("a")) expect_error(fvar(NA_character_)) expect_error(fvar(mNAc)) expect_error(fvar(mNAc, f)) expect_error(fvar(1:2,1:3)) expect_error(fvar(m,1:31)) expect_error(fvar(mtcars,1:31)) expect_error(fvar(mtcars, w = 1:31)) expect_error(fvar("a", w = 1)) expect_error(fvar(1:2, w = 1:3)) expect_error(fvar(NA_character_, w = 1)) expect_error(fvar(mNAc, w = wdat)) expect_error(fvar(mNAc, f, wdat)) expect_error(fvar(mNA, w = 1:33)) expect_error(fvar(1:2,1:2, 1:3)) expect_error(fvar(m,1:32,1:20)) expect_error(fvar(mtcars,1:32,1:10)) expect_error(fvar(1:2, w = c("a","b"))) expect_error(fvar(wlddev)) expect_error(fvar(wlddev, w = wlddev$year)) expect_error(fvar(wlddev, wlddev$iso3c)) expect_error(fvar(wlddev, wlddev$iso3c, wlddev$year)) }) # Repeating all tests for the other algorithm test_that("fvar with direct algorithm performs like base::var", { expect_equal(fvar(NA, stable.algo = FALSE), bvar(NA)) expect_equal(fvar(NA, na.rm = FALSE, stable.algo = FALSE), bvar(NA)) expect_equal(fvar(1, stable.algo = FALSE), bvar(1, na.rm = TRUE)) expect_equal(fvar(1:3, stable.algo = FALSE), bvar(1:3, na.rm = TRUE)) expect_equal(fvar(-1:1, stable.algo = FALSE), bvar(-1:1, na.rm = TRUE)) expect_equal(fvar(1, na.rm = FALSE, stable.algo = FALSE), bvar(1)) expect_equal(fvar(1:3, na.rm = FALSE, stable.algo = FALSE), bvar(1:3)) expect_equal(fvar(-1:1, na.rm = FALSE, stable.algo = FALSE), bvar(-1:1)) expect_equal(fvar(x, stable.algo = FALSE), bvar(x, na.rm = TRUE)) expect_equal(fvar(x, na.rm = FALSE, stable.algo = FALSE), bvar(x)) expect_equal(fvar(xNA, na.rm = FALSE, stable.algo = FALSE), bvar(xNA)) expect_equal(fvar(xNA, stable.algo = FALSE), bvar(xNA, na.rm = TRUE)) expect_equal(fvar(mtcars, stable.algo = FALSE), fvar(m)) expect_equal(fvar(m, stable.algo = FALSE), dapply(m, bvar, na.rm = TRUE)) expect_equal(fvar(m, na.rm = FALSE, stable.algo = FALSE), dapply(m, bvar)) expect_equal(fvar(mNA, na.rm = FALSE, stable.algo = FALSE), dapply(mNA, bvar)) expect_equal(fvar(mNA, stable.algo = FALSE), dapply(mNA, bvar, na.rm = TRUE)) expect_equal(fvar(mtcars, stable.algo = FALSE), dapply(mtcars, bvar, na.rm = TRUE)) expect_equal(fvar(mtcars, na.rm = FALSE, stable.algo = FALSE), dapply(mtcars, bvar)) expect_equal(fvar(mtcNA, na.rm = FALSE, stable.algo = FALSE), dapply(mtcNA, bvar)) expect_equal(fvar(mtcNA, stable.algo = FALSE), dapply(mtcNA, bvar, na.rm = TRUE)) expect_equal(fvar(x, f, stable.algo = FALSE), BY(x, f, bvar, na.rm = TRUE)) expect_equal(fvar(x, f, na.rm = FALSE, stable.algo = FALSE), BY(x, f, bvar)) expect_equal(fvar(xNA, f, na.rm = FALSE, stable.algo = FALSE), BY(xNA, f, bvar)) expect_equal(fvar(xNA, f, stable.algo = FALSE), BY(xNA, f, bvar, na.rm = TRUE)) # failed? # expect_equal(fvar(m, g, stable.algo = FALSE), BY(m, g, bvar, na.rm = TRUE)) # failed on arch i386 CMD check and patched-solaris-x86 # expect_equal(fvar(m, g, na.rm = FALSE, stable.algo = FALSE), BY(m, g, bvar)) # failed on arch i386 CMD check and patched-solaris-x86 # expect_equal(fvar(mNA, g, na.rm = FALSE, stable.algo = FALSE), BY(mNA, g, bvar)) # failed on arch i386 CMD check and patched-solaris-x86 # expect_equal(fvar(mNA, g, stable.algo = FALSE), BY(mNA, g, bvar, na.rm = TRUE)) # failed on arch i386 CMD check and patched-solaris-x86 # expect_equal(fvar(mtcars, g, stable.algo = FALSE), BY(mtcars, g, bvar, na.rm = TRUE)) # failed on arch i386 CMD check and patched-solaris-x86 # expect_equal(fvar(mtcars, g, na.rm = FALSE, stable.algo = FALSE), BY(mtcars, g, bvar)) # failed on arch i386 CMD check and patched-solaris-x86 # expect_equal(fvar(mtcNA, g, na.rm = FALSE, stable.algo = FALSE), BY(mtcNA, g, bvar)) # failed on arch i386 CMD check and patched-solaris-x86 # expect_equal(fvar(mtcNA, g, stable.algo = FALSE), BY(mtcNA, g, bvar, na.rm = TRUE)) # failed on arch i386 CMD check and patched-solaris-x86 }) test_that("fvar with with direct algorithm and weights performs as intended (unbiased)", { expect_equal(fvar(c(2,2,4,5,5,5), stable.algo = FALSE), fvar(c(2,4,5), w = c(2,1,3), stable.algo = FALSE)) expect_equal(fvar(c(2,2,4,5,5,5), na.rm = FALSE, stable.algo = FALSE), fvar(c(2,4,5), w = c(2,1,3), na.rm = FALSE), stable.algo = FALSE) expect_equal(fvar(c(2.456,2.456,4.123,5.009,5.009,5.009), stable.algo = FALSE), fvar(c(2.456,4.123,5.009), w = c(2,1,3), stable.algo = FALSE)) expect_equal(fvar(c(2.456,2.456,4.123,5.009,5.009,5.009), na.rm = FALSE, stable.algo = FALSE), fvar(c(2.456,4.123,5.009), w = c(2,1,3), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(c(2,2,NA,5,5,5), stable.algo = FALSE), fvar(c(2,NA,5), w = c(2,1,3), stable.algo = FALSE)) expect_equal(fvar(c(2,2,NA,5,5,5), na.rm = FALSE, stable.algo = FALSE), fvar(c(2,NA,5), w = c(2,1,3), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(c(2,2,NA,5,5,5), stable.algo = FALSE), fvar(c(2,4,5), w = c(2,NA,3), stable.algo = FALSE)) expect_equal(fvar(c(2,2,NA,5,5,5), na.rm = FALSE, stable.algo = FALSE), fvar(c(2,4,5), w = c(2,NA,3), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(c(NA,NA,4.123,5.009,5.009,5.009), stable.algo = FALSE), fvar(c(NA,4.123,5.009), w = c(2,1,3), stable.algo = FALSE)) expect_equal(fvar(c(NA,NA,4.123,5.009,5.009,5.009), na.rm = FALSE, stable.algo = FALSE), fvar(c(NA,4.123,5.009), w = c(2,1,3), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(c(NA,NA,4.123,5.009,5.009,5.009), stable.algo = FALSE), fvar(c(2.456,4.123,5.009), w = c(NA,1,3), stable.algo = FALSE)) expect_equal(fvar(c(NA,NA,4.123,5.009,5.009,5.009), na.rm = FALSE, stable.algo = FALSE), fvar(c(2.456,4.123,5.009), w = c(NA,1,3), na.rm = FALSE, stable.algo = FALSE)) f <- as.factor(rep(1:2, each = 6)); fs <- as.factor(rep(1:2, each = 3)) v <- c(2,2,4,5,5,5,2,2,4,5,5,5); vs <- c(2,4,5,2,4,5); w <- c(2,1,3,2,1,3) v2 <- c(2.456,2.456,4.123,5.009,5.009,5.009,2.456,2.456,4.123,5.009,5.009,5.009); v2s <- c(2.456,4.123,5.009,2.456,4.123,5.009) expect_equal(fvar(v, f, stable.algo = FALSE), fvar(vs, fs, w, stable.algo = FALSE)) expect_equal(fvar(v, f, na.rm = FALSE, stable.algo = FALSE), fvar(vs, fs, w, na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(v2, f, stable.algo = FALSE), fvar(v2s, fs, w, stable.algo = FALSE)) expect_equal(fvar(v2, f, na.rm = FALSE, stable.algo = FALSE), fvar(v2s, fs, w, na.rm = FALSE, stable.algo = FALSE)) v[c(3,9)] <- NA; vs[c(2,5)] <- NA expect_equal(fvar(v, f, stable.algo = FALSE), fvar(vs, fs, w, stable.algo = FALSE)) expect_equal(fvar(v, f, na.rm = FALSE, stable.algo = FALSE), fvar(vs, fs, w, na.rm = FALSE, stable.algo = FALSE)) vs[c(2,5)] <- 4; w[c(2,5)] <- NA expect_equal(fvar(v, f, stable.algo = FALSE), fvar(vs, fs, w, stable.algo = FALSE)) expect_equal(fvar(v, f, na.rm = FALSE, stable.algo = FALSE), fvar(vs, fs, w, na.rm = FALSE, stable.algo = FALSE)) w[c(2,5)] <- 1; v2[c(1:2,7:8)] <- NA; v2s[c(1,4)] <- NA expect_equal(fvar(v2, f, stable.algo = FALSE), fvar(v2s, fs, w, stable.algo = FALSE)) expect_equal(fvar(v2, f, na.rm = FALSE, stable.algo = FALSE), fvar(v2s, fs, w, na.rm = FALSE, stable.algo = FALSE)) v2s[c(1,4)] <- 2.456; w[c(1,4)] <- NA expect_equal(fvar(v2, f, stable.algo = FALSE), fvar(v2s, fs, w, stable.algo = FALSE)) expect_equal(fvar(v2, f, na.rm = FALSE, stable.algo = FALSE), fvar(v2s, fs, w, na.rm = FALSE, stable.algo = FALSE)) }) test_that("fvar with direct algorithm performs like fvar with unit weights", { expect_equal(fvar(NA, stable.algo = FALSE), fvar(NA, w = 1, stable.algo = FALSE)) expect_equal(fvar(NA, na.rm = FALSE, stable.algo = FALSE), fvar(NA, w = 1, na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(1, stable.algo = FALSE), fvar(1, w = 1, stable.algo = FALSE)) expect_equal(fvar(1:3, stable.algo = FALSE), fvar(1:3, w = rep(1,3), stable.algo = FALSE)) expect_equal(fvar(-1:1, stable.algo = FALSE), fvar(-1:1, w = rep(1,3), stable.algo = FALSE)) expect_equal(fvar(1, na.rm = FALSE, stable.algo = FALSE), fvar(1, w = 1, na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(1:3, na.rm = FALSE, stable.algo = FALSE), fvar(1:3, w = rep(1, 3), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(-1:1, na.rm = FALSE, stable.algo = FALSE), fvar(-1:1, w = rep(1, 3), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(x, stable.algo = FALSE), fvar(x, w = rep(1,100), stable.algo = FALSE)) expect_equal(fvar(x, na.rm = FALSE, stable.algo = FALSE), fvar(x, w = rep(1, 100), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(xNA, na.rm = FALSE, stable.algo = FALSE), fvar(xNA, w = rep(1, 100), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(xNA, stable.algo = FALSE), fvar(xNA, w = rep(1, 100), stable.algo = FALSE)) expect_equal(fvar(m, stable.algo = FALSE), fvar(m, w = rep(1, 32), stable.algo = FALSE)) expect_equal(fvar(m, na.rm = FALSE, stable.algo = FALSE), fvar(m, w = rep(1, 32), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(mNA, na.rm = FALSE, stable.algo = FALSE), fvar(mNA, w = rep(1, 32), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(mNA, stable.algo = FALSE), fvar(mNA, w = rep(1, 32), stable.algo = FALSE)) expect_equal(fvar(mtcars, stable.algo = FALSE), fvar(mtcars, w = rep(1, 32), stable.algo = FALSE)) expect_equal(fvar(mtcars, na.rm = FALSE, stable.algo = FALSE), fvar(mtcars, w = rep(1, 32), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(mtcNA, na.rm = FALSE, stable.algo = FALSE), fvar(mtcNA, w = rep(1, 32), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(mtcNA, stable.algo = FALSE), fvar(mtcNA, w = rep(1, 32), stable.algo = FALSE)) expect_equal(fvar(x, f, stable.algo = FALSE), fvar(x, f, rep(1,100), stable.algo = FALSE)) expect_equal(fvar(x, f, na.rm = FALSE, stable.algo = FALSE), fvar(x, f, rep(1,100), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(xNA, f, na.rm = FALSE, stable.algo = FALSE), fvar(xNA, f, rep(1,100), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(xNA, f, stable.algo = FALSE), fvar(xNA, f, rep(1,100), stable.algo = FALSE)) expect_equal(fvar(m, g, stable.algo = FALSE), fvar(m, g, rep(1,32), stable.algo = FALSE)) expect_equal(fvar(m, g, na.rm = FALSE, stable.algo = FALSE), fvar(m, g, rep(1,32), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(mNA, g, na.rm = FALSE, stable.algo = FALSE), fvar(mNA, g, rep(1,32), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(mNA, g, stable.algo = FALSE), fvar(mNA, g, rep(1,32), stable.algo = FALSE)) expect_equal(fvar(mtcars, g, stable.algo = FALSE), fvar(mtcars, g, rep(1,32), stable.algo = FALSE)) expect_equal(fvar(mtcars, g, na.rm = FALSE, stable.algo = FALSE), fvar(mtcars, g, rep(1,32), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(mtcNA, g, na.rm = FALSE, stable.algo = FALSE), fvar(mtcNA, g, rep(1,32), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(mtcNA, g, stable.algo = FALSE), fvar(mtcNA, g, rep(1,32), stable.algo = FALSE)) }) test_that("fvar with weights performs like wvar (defined above)", { # complete weights expect_equal(fvar(NA, w = 1, stable.algo = FALSE), wvar(NA, 1)) expect_equal(fvar(NA, w = 1, na.rm = FALSE, stable.algo = FALSE), wvar(NA, 1)) expect_equal(fvar(1, w = 1, stable.algo = FALSE), wvar(1, w = 1)) expect_equal(fvar(1:3, w = 1:3, stable.algo = FALSE), wvar(1:3, 1:3)) expect_equal(fvar(-1:1, w = 1:3, stable.algo = FALSE), wvar(-1:1, 1:3)) expect_equal(fvar(1, w = 1, na.rm = FALSE, stable.algo = FALSE), wvar(1, 1)) expect_equal(fvar(1:3, w = c(0.99,3454,1.111), na.rm = FALSE, stable.algo = FALSE), wvar(1:3, c(0.99,3454,1.111))) expect_equal(fvar(-1:1, w = 1:3, na.rm = FALSE, stable.algo = FALSE), wvar(-1:1, 1:3)) expect_equal(fvar(x, w = w, stable.algo = FALSE), wvar(x, w)) expect_equal(fvar(x, w = w, na.rm = FALSE, stable.algo = FALSE), wvar(x, w)) expect_equal(fvar(xNA, w = w, na.rm = FALSE, stable.algo = FALSE), wvar(xNA, w)) expect_equal(fvar(xNA, w = w, stable.algo = FALSE), wvar(xNA, w, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdat, stable.algo = FALSE), fvar(m, w = wdat)) expect_equal(fvar(m, w = wdat, stable.algo = FALSE), dapply(m, wvar, wdat, na.rm = TRUE)) expect_equal(fvar(m, w = wdat, na.rm = FALSE, stable.algo = FALSE), dapply(m, wvar, wdat)) expect_equal(fvar(mNA, w = wdat, na.rm = FALSE, stable.algo = FALSE), dapply(mNA, wvar, wdat)) expect_equal(fvar(mNA, w = wdat, stable.algo = FALSE), dapply(mNA, wvar, wdat, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdat, stable.algo = FALSE), dapply(mtcars, wvar, wdat, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdat, na.rm = FALSE, stable.algo = FALSE), dapply(mtcars, wvar, wdat)) expect_equal(fvar(mtcNA, w = wdat, na.rm = FALSE, stable.algo = FALSE), dapply(mtcNA, wvar, wdat)) expect_equal(fvar(mtcNA, w = wdat, stable.algo = FALSE), dapply(mtcNA, wvar, wdat, na.rm = TRUE)) expect_equal(fvar(x, f, w, stable.algo = FALSE), wBY(x, f, wvar, w)) expect_equal(fvar(x, f, w, na.rm = FALSE, stable.algo = FALSE), wBY(x, f, wvar, w)) expect_equal(fvar(xNA, f, w, na.rm = FALSE, stable.algo = FALSE), wBY(xNA, f, wvar, w)) expect_equal(na20(fvar(xNA, f, w, stable.algo = FALSE)), na20(wBY(xNA, f, wvar, w, na.rm = TRUE))) expect_equal(fvar(m, g, wdat, stable.algo = FALSE), wBY(m, gf, wvar, wdat)) expect_equal(fvar(m, g, wdat, na.rm = FALSE, stable.algo = FALSE), wBY(m, gf, wvar, wdat)) expect_equal(fvar(mNA, g, wdat, na.rm = FALSE), wBY(mNA, gf, wvar, wdat)) expect_equal(na20(fvar(mNA, g, wdat, stable.algo = FALSE)), na20(wBY(mNA, gf, wvar, wdat, na.rm = TRUE))) expect_equal(fvar(mtcars, g, wdat, stable.algo = FALSE), wBY(mtcars, gf, wvar, wdat)) expect_equal(fvar(mtcars, g, wdat, na.rm = FALSE, stable.algo = FALSE), wBY(mtcars, gf, wvar, wdat)) expect_equal(fvar(mtcNA, g, wdat, na.rm = FALSE, stable.algo = FALSE), wBY(mtcNA, gf, wvar, wdat)) expect_equal(na20(fvar(mtcNA, g, wdat, stable.algo = FALSE)), na20(wBY(mtcNA, gf, wvar, wdat, na.rm = TRUE))) # missing weights expect_equal(fvar(NA, w = NA, stable.algo = FALSE), wvar(NA, NA)) expect_equal(fvar(NA, w = NA, na.rm = FALSE, stable.algo = FALSE), wvar(NA, NA)) expect_equal(fvar(1, w = NA, stable.algo = FALSE), wvar(1, w = NA)) expect_equal(fvar(1:3, w = c(NA,1:2), stable.algo = FALSE), wvar(1:3, c(NA,1:2), na.rm = TRUE)) expect_equal(fvar(-1:1, w = c(NA,1:2), stable.algo = FALSE), wvar(-1:1, c(NA,1:2), na.rm = TRUE)) expect_equal(fvar(1, w = NA, na.rm = FALSE, stable.algo = FALSE), wvar(1, NA)) expect_equal(fvar(1:3, w = c(NA,1:2), na.rm = FALSE, stable.algo = FALSE), wvar(1:3, c(NA,1:2))) expect_equal(fvar(-1:1, w = c(NA,1:2), na.rm = FALSE, stable.algo = FALSE), wvar(-1:1, c(NA,1:2))) expect_equal(fvar(x, w = wNA, stable.algo = FALSE), wvar(x, wNA, na.rm = TRUE)) expect_equal(fvar(x, w = wNA, na.rm = FALSE, stable.algo = FALSE), wvar(x, wNA)) expect_equal(fvar(xNA, w = wNA, na.rm = FALSE, stable.algo = FALSE), wvar(xNA, wNA)) expect_equal(fvar(xNA, w = wNA, stable.algo = FALSE), wvar(xNA, wNA, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdatNA, stable.algo = FALSE), fvar(m, w = wdatNA)) expect_equal(fvar(m, w = wdatNA, stable.algo = FALSE), dapply(m, wvar, wdatNA, na.rm = TRUE)) expect_equal(fvar(m, w = wdatNA, na.rm = FALSE, stable.algo = FALSE), dapply(m, wvar, wdatNA)) expect_equal(fvar(mNA, w = wdatNA, na.rm = FALSE, stable.algo = FALSE), dapply(mNA, wvar, wdatNA)) expect_equal(fvar(mNA, w = wdatNA, stable.algo = FALSE), dapply(mNA, wvar, wdatNA, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdatNA, stable.algo = FALSE), dapply(mtcars, wvar, wdatNA, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdatNA, na.rm = FALSE, stable.algo = FALSE), dapply(mtcars, wvar, wdatNA)) expect_equal(fvar(mtcNA, w = wdatNA, na.rm = FALSE, stable.algo = FALSE), dapply(mtcNA, wvar, wdatNA)) expect_equal(fvar(mtcNA, w = wdatNA, stable.algo = FALSE), dapply(mtcNA, wvar, wdatNA, na.rm = TRUE)) expect_equal(na20(fvar(x, f, wNA, stable.algo = FALSE)), na20(wBY(x, f, wvar, wNA, na.rm = TRUE))) expect_equal(fvar(x, f, wNA, na.rm = FALSE, stable.algo = FALSE), wBY(x, f, wvar, wNA)) expect_equal(fvar(xNA, f, wNA, na.rm = FALSE, stable.algo = FALSE), wBY(xNA, f, wvar, wNA)) expect_equal(na20(fvar(xNA, f, wNA, stable.algo = FALSE)), na20(wBY(xNA, f, wvar, wNA, na.rm = TRUE))) expect_equal(na20(fvar(m, g, wdatNA, stable.algo = FALSE)), na20(wBY(m, gf, wvar, wdatNA, na.rm = TRUE))) expect_equal(fvar(m, g, wdatNA, na.rm = FALSE, stable.algo = FALSE), wBY(m, gf, wvar, wdatNA)) expect_equal(fvar(mNA, g, wdatNA, na.rm = FALSE, stable.algo = FALSE), wBY(mNA, gf, wvar, wdatNA)) expect_equal(na20(fvar(mNA, g, wdatNA, stable.algo = FALSE)), na20(wBY(mNA, gf, wvar, wdatNA, na.rm = TRUE))) expect_equal(na20(fvar(mtcars, g, wdatNA, stable.algo = FALSE)), na20(wBY(mtcars, gf, wvar, wdatNA, na.rm = TRUE))) expect_equal(fvar(mtcars, g, wdatNA, na.rm = FALSE, stable.algo = FALSE), wBY(mtcars, gf, wvar, wdatNA)) expect_equal(fvar(mtcNA, g, wdatNA, na.rm = FALSE, stable.algo = FALSE), wBY(mtcNA, gf, wvar, wdatNA)) expect_equal(na20(fvar(mtcNA, g, wdatNA, stable.algo = FALSE)), na20(wBY(mtcNA, gf, wvar, wdatNA, na.rm = TRUE))) }) test_that("fvar with direct algorithm performs numerically stable", { expect_true(all_obj_equal(replicate(50, fvar(1, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, stable.algo = FALSE), simplify = FALSE))) }) test_that("fvar with with direct algorithm and complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fvar(1, w = 1, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, w = 1, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, w = 1, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, w = w, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, w = w, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, w = w, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, w = w, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, w = wdat, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, w = wdat, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, w = wdat, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, w = wdat, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, w = wdat, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, w = wdat, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, w = wdat, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, w = wdat, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, w, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, w, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, w, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, w, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, wdat, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, wdat, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, wdat, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, wdat, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, wdat, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, wdat, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, wdat, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, wdat, stable.algo = FALSE), simplify = FALSE))) }) test_that("fvar with with direct algorithm and missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fvar(1, w = NA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, w = NA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, w = NA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, w = wNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, w = wNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, w = wNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, w = wNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, w = wdatNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, w = wdatNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, w = wdatNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, w = wdatNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, w = wdatNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, w = wdatNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, w = wdatNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, w = wdatNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, wNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, wNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, wNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, wNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, wdatNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, wdatNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, wdatNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, wdatNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, wdatNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, wdatNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, wdatNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, wdatNA, stable.algo = FALSE), simplify = FALSE))) }) test_that("fvar with direct algorithm handles special values in the right way", { expect_equal(fvar(NA, stable.algo = FALSE), NA_real_) expect_equal(fvar(NaN, stable.algo = FALSE), NA_real_) expect_equal(fvar(Inf, stable.algo = FALSE), NA_real_) expect_equal(fvar(-Inf, stable.algo = FALSE), NA_real_) expect_equal(fvar(TRUE, stable.algo = FALSE), NA_real_) expect_equal(fvar(FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(NA, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(NaN, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(Inf, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(-Inf, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(TRUE, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(FALSE, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(c(1,NA), stable.algo = FALSE), NA_real_) expect_equal(fvar(c(1,NaN), stable.algo = FALSE), NA_real_) expect_equal(fvar(c(1,Inf), stable.algo = FALSE), NA_real_) expect_equal(fvar(c(1,-Inf), stable.algo = FALSE), NA_real_) expect_equal(fvar(c(FALSE,TRUE), stable.algo = FALSE), 0.5) expect_equal(fvar(c(FALSE,FALSE), stable.algo = FALSE), 0) expect_equal(fvar(c(1,Inf), na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(c(1,-Inf), na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(c(FALSE,TRUE), na.rm = FALSE, stable.algo = FALSE), 0.5) expect_equal(fvar(c(FALSE,FALSE), na.rm = FALSE, stable.algo = FALSE), 0) }) test_that("fvar with with direct algorithm and weights handles special values in the right way", { expect_equal(fvar(NA, w = 1, stable.algo = FALSE), NA_real_) expect_equal(fvar(NaN, w = 1, stable.algo = FALSE), NA_real_) expect_equal(fvar(Inf, w = 1, stable.algo = FALSE), NA_real_) expect_equal(fvar(-Inf, w = 1, stable.algo = FALSE), NA_real_) expect_equal(fvar(TRUE, w = 1, stable.algo = FALSE), NA_real_) expect_equal(fvar(FALSE, w = 1, stable.algo = FALSE), NA_real_) expect_equal(fvar(NA, w = 1, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(NaN, w = 1, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(Inf, w = 1, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(-Inf, w = 1, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(TRUE, w = 1, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(FALSE, w = 1, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(NA, w = NA, stable.algo = FALSE), NA_real_) expect_equal(fvar(NaN, w = NA, stable.algo = FALSE), NA_real_) expect_equal(fvar(Inf, w = NA, stable.algo = FALSE), NA_real_) expect_equal(fvar(-Inf, w = NA, stable.algo = FALSE), NA_real_) expect_equal(fvar(TRUE, w = NA, stable.algo = FALSE), NA_real_) expect_equal(fvar(FALSE, w = NA, stable.algo = FALSE), NA_real_) expect_equal(fvar(NA, w = NA, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(NaN, w = NA, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(Inf, w = NA, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(-Inf, w = NA, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(TRUE, w = NA, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(FALSE, w = NA, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(1:3, w = c(1,Inf,3), stable.algo = FALSE), NA_real_) expect_equal(fvar(1:3, w = c(1,-Inf,3), stable.algo = FALSE), NA_real_) expect_equal(fvar(1:3, w = c(1,Inf,3), na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(1:3, w = c(1,-Inf,3), na.rm = FALSE, stable.algo = FALSE), NA_real_) }) test_that("fvar with direct algorithm produces errors for wrong input", { expect_error(fvar("a", stable.algo = FALSE)) expect_error(fvar(NA_character_, stable.algo = FALSE)) expect_error(fvar(mNAc, stable.algo = FALSE)) expect_error(fvar(mNAc, f, stable.algo = FALSE)) expect_error(fvar(1:2,1:3, stable.algo = FALSE)) expect_error(fvar(m,1:31, stable.algo = FALSE)) expect_error(fvar(mtcars,1:31, stable.algo = FALSE)) expect_error(fvar(mtcars, w = 1:31, stable.algo = FALSE)) expect_error(fvar("a", w = 1, stable.algo = FALSE)) expect_error(fvar(1:2, w = 1:3, stable.algo = FALSE)) expect_error(fvar(NA_character_, w = 1, stable.algo = FALSE)) expect_error(fvar(mNAc, w = wdat, stable.algo = FALSE)) expect_error(fvar(mNAc, f, wdat, stable.algo = FALSE)) expect_error(fvar(mNA, w = 1:33, stable.algo = FALSE)) expect_error(fvar(1:2,1:2, 1:3, stable.algo = FALSE)) expect_error(fvar(m,1:32,1:20, stable.algo = FALSE)) expect_error(fvar(mtcars,1:32,1:10, stable.algo = FALSE)) expect_error(fvar(1:2, w = c("a","b"), stable.algo = FALSE)) expect_error(fvar(wlddev, stable.algo = FALSE)) expect_error(fvar(wlddev, w = wlddev$year, stable.algo = FALSE)) expect_error(fvar(wlddev, wlddev$iso3c, stable.algo = FALSE)) expect_error(fvar(wlddev, wlddev$iso3c, wlddev$year, stable.algo = FALSE)) }) # fsd (not necessary to test in the same way because it's just sqrt(fvar())) test_that("fsd performs like base::sd", { expect_equal(fsd(NA), bsd(NA)) expect_equal(fsd(NA, na.rm = FALSE), bsd(NA)) expect_equal(fsd(1), bsd(1, na.rm = TRUE)) expect_equal(fsd(1:3), bsd(1:3, na.rm = TRUE)) expect_equal(fsd(-1:1), bsd(-1:1, na.rm = TRUE)) expect_equal(fsd(1, na.rm = FALSE), bsd(1)) expect_equal(fsd(1:3, na.rm = FALSE), bsd(1:3)) expect_equal(fsd(-1:1, na.rm = FALSE), bsd(-1:1)) expect_equal(fsd(x), bsd(x, na.rm = TRUE)) expect_equal(fsd(x, na.rm = FALSE), bsd(x)) expect_equal(fsd(xNA, na.rm = FALSE), bsd(xNA)) expect_equal(fsd(xNA), bsd(xNA, na.rm = TRUE)) expect_equal(fsd(mtcars), fsd(m)) expect_equal(fsd(m), dapply(m, bsd, na.rm = TRUE)) expect_equal(fsd(m, na.rm = FALSE), dapply(m, bsd)) expect_equal(fsd(mNA, na.rm = FALSE), dapply(mNA, bsd)) expect_equal(fsd(mNA), dapply(mNA, bsd, na.rm = TRUE)) expect_equal(fsd(mtcars), dapply(mtcars, bsd, na.rm = TRUE)) expect_equal(fsd(mtcars, na.rm = FALSE), dapply(mtcars, bsd)) expect_equal(fsd(mtcNA, na.rm = FALSE), dapply(mtcNA, bsd)) expect_equal(fsd(mtcNA), dapply(mtcNA, bsd, na.rm = TRUE)) expect_equal(fsd(x, f), BY(x, f, bsd, na.rm = TRUE)) expect_equal(fsd(x, f, na.rm = FALSE), BY(x, f, bsd)) expect_equal(fsd(xNA, f, na.rm = FALSE), BY(xNA, f, bsd)) expect_equal(fsd(xNA, f), BY(xNA, f, bsd, na.rm = TRUE)) expect_equal(fsd(m, g), BY(m, g, bsd, na.rm = TRUE)) expect_equal(fsd(m, g, na.rm = FALSE), BY(m, g, bsd)) expect_equal(fsd(mNA, g, na.rm = FALSE), BY(mNA, g, bsd)) expect_equal(fsd(mNA, g), BY(mNA, g, bsd, na.rm = TRUE)) expect_equal(fsd(mtcars, g), BY(mtcars, g, bsd, na.rm = TRUE)) expect_equal(fsd(mtcars, g, na.rm = FALSE), BY(mtcars, g, bsd)) expect_equal(fsd(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bsd)) expect_equal(fsd(mtcNA, g), BY(mtcNA, g, bsd, na.rm = TRUE)) }) test_that("fsd performs numerically stable", { expect_true(all_obj_equal(replicate(50, fsd(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mtcNA, g), simplify = FALSE))) }) test_that("fsd handles special values in the right way", { expect_equal(fsd(NA), NA_real_) expect_equal(fsd(NaN), NA_real_) expect_equal(fsd(Inf), NA_real_) expect_equal(fsd(-Inf), NA_real_) expect_equal(fsd(TRUE), NA_real_) expect_equal(fsd(FALSE), NA_real_) expect_equal(fsd(NA, na.rm = FALSE), NA_real_) expect_equal(fsd(NaN, na.rm = FALSE), NA_real_) expect_equal(fsd(Inf, na.rm = FALSE), NA_real_) expect_equal(fsd(-Inf, na.rm = FALSE), NA_real_) expect_equal(fsd(TRUE, na.rm = FALSE), NA_real_) expect_equal(fsd(FALSE, na.rm = FALSE), NA_real_) }) test_that("fsd produces errors for wrong input", { expect_error(fsd("a")) expect_error(fsd(NA_character_)) expect_error(fsd(mNAc)) expect_error(fsd(mNAc, f)) expect_error(fsd(1:2,1:3)) expect_error(fsd(m,1:31)) expect_error(fsd(mtcars,1:31)) expect_error(fsd(mtcars, w = 1:31)) expect_error(fsd("a", w = 1)) expect_error(fsd(1:2, w = 1:3)) expect_error(fsd(NA_character_, w = 1)) expect_error(fsd(mNAc, w = wdat)) expect_error(fsd(mNAc, f, wdat)) expect_error(fsd(mNA, w = 1:33)) expect_error(fsd(1:2,1:2, 1:3)) expect_error(fsd(m,1:32,1:20)) expect_error(fsd(mtcars,1:32,1:10)) expect_error(fsd(1:2, w = c("a","b"))) expect_error(fsd(wlddev)) expect_error(fsd(wlddev, w = wlddev$year)) expect_error(fsd(wlddev, wlddev$iso3c)) expect_error(fsd(wlddev, wlddev$iso3c, wlddev$year)) }) collapse/tests/testthat/test-BY.R0000644000176200001440000003226114167351021016474 0ustar liggesuserscontext("BY") bmean <- base::mean bsd <- stats::sd bsum <- base::sum bmin <- base::min bmax <- base::max bscale <- base::scale # rm(list = ls()) set.seed(101) x <- rnorm(100) xNA <- x xNA[sample.int(100,20)] <- NA f <- as.factor(sort(sample.int(10, 100, TRUE))) g <- GRP(mtcars, ~ cyl + vs + am) f2 <- as.factor(sort(sample.int(6, 32, TRUE))) m <- as.matrix(mtcars) mNA <- na_insert(m) mtcNA <- na_insert(mtcars) na20 <- function(x) { x[is.na(x)] <- 0 x } myscale <- function(x, na.rm = FALSE) (x - mean.default(x, na.rm = na.rm)) / bsd(x, na.rm = na.rm) mysumf <- function(x, na.rm = FALSE) c(N = bsum(!is.na(x)), Mean = bmean(x, na.rm = na.rm), SD = bsd(x, na.rm = na.rm), Min = bmin(x, na.rm = na.rm), Max = bmax(x, na.rm = na.rm)) options(warn = -1) test_that("BY.default works as intended", { # No missing values expect_equal(BY(x, f, bsum), fsum(x, f)) expect_equal(BY(x, f, bsum, return = "list"), as.list(fsum(x, f))) expect_equal(BY(x, f, bmean), fmean(x, f)) expect_equal(BY(x, f, bmean, return = "list"), as.list(fmean(x, f))) # BY(x, f, bscale) expect_equal(BY(x, f, bscale, use.g.names = FALSE), fscale(x, f)) expect_equal(BY(x, f, log, use.g.names = FALSE), log(x)) expect_equal(BY(x, f, quantile), unlist(lapply(split(x, f), quantile))) expect_equal(BY(x, f, quantile, expand.wide = TRUE), t(sapply(split(x, f), quantile))) expect_equal(BY(x, f, quantile, return = "list"), lapply(split(x, f), quantile)) expect_equal(BY(x, f, quantile, return = "list", expand.wide = TRUE), lapply(split(x, f), quantile)) # This should have no effect !! # Missing values removed expect_equal(BY(xNA, f, bsum, na.rm = TRUE), na20(fsum(xNA, f))) expect_equal(BY(xNA, f, bsum, return = "list", na.rm = TRUE), as.list(na20(fsum(xNA, f)))) expect_equal(BY(xNA, f, bmean, na.rm = TRUE), fmean(xNA, f)) expect_equal(BY(xNA, f, bmean, return = "list", na.rm = TRUE), as.list(fmean(xNA, f))) expect_equal(BY(xNA, f, bscale, use.g.names = FALSE), fscale(xNA, f)) expect_equal(BY(xNA, f, quantile, na.rm = TRUE), unlist(lapply(split(xNA, f), quantile, na.rm = TRUE))) expect_equal(BY(xNA, f, quantile, expand.wide = TRUE, na.rm = TRUE), t(sapply(split(xNA, f), quantile, na.rm = TRUE))) expect_equal(BY(xNA, f, quantile, return = "list", na.rm = TRUE), lapply(split(xNA, f), quantile, na.rm = TRUE)) expect_equal(BY(xNA, f, quantile, return = "list", expand.wide = TRUE, na.rm = TRUE), lapply(split(xNA, f), quantile, na.rm = TRUE)) # This should have no effect !! # Missing values kept expect_equal(BY(xNA, f, bsum), fsum(xNA, f, na.rm = FALSE)) expect_equal(BY(xNA, f, bsum, return = "list"), as.list(fsum(xNA, f, na.rm = FALSE))) expect_equal(BY(xNA, f, bmean), fmean(xNA, f, na.rm = FALSE)) expect_equal(BY(xNA, f, bmean, return = "list"), as.list(fmean(xNA, f, na.rm = FALSE))) expect_equal(BY(xNA, f, myscale, use.g.names = FALSE), fscale(xNA, f, na.rm = FALSE)) expect_equal(BY(xNA, f, mysumf), unlist(lapply(split(xNA, f), mysumf))) expect_equal(BY(xNA, f, mysumf, expand.wide = TRUE), t(sapply(split(xNA, f), mysumf))) expect_equal(BY(xNA, f, mysumf, return = "list"), lapply(split(xNA, f), mysumf)) expect_equal(BY(xNA, f, mysumf, return = "list", expand.wide = TRUE), lapply(split(xNA, f), mysumf)) # This should have no effect !! }) test_that("BY.matrix works as intended", { # No missing values expect_equal(BY(m, g, bsum), fsum(m, g)) expect_equal(BY(m, g, bsum, return = "data.frame"), qDF(fsum(m, g))) expect_equal(BY(m, g, bmean), fmean(m, g)) expect_equal(BY(m, g, bmean, return = "data.frame"), qDF(fmean(m, g))) # BY(m, g, bscale) expect_equal(BY(m, f2, bscale, use.g.names = FALSE), setRownames(fscale(m, f2), NULL)) expect_equal(BY(m, f2, log, use.g.names = FALSE), setRownames(log(m), NULL)) expect_equal(BY(m, f2, quantile), qM(lapply(mctl(m, names = TRUE), function(x) unlist(lapply(split(x, f2), quantile))))) expect_equal(setDimnames(BY(m, f2, quantile, expand.wide = TRUE), NULL), setDimnames(do.call(cbind, lapply(mctl(m, names = TRUE), function(x) do.call(rbind, lapply(split(x, f2), quantile)))), NULL)) expect_equal(BY(m, f2, quantile, return = "data.frame"), qDF(qM(lapply(mctl(m, names = TRUE), function(x) unlist(lapply(split(x, f2), quantile)))))) expect_equal(unname(BY(m, f2, quantile, return = "data.frame", expand.wide = TRUE)), unname(qDF(do.call(cbind, lapply(mctl(m, names = TRUE), function(x) do.call(rbind, lapply(split(x, f2), quantile))))))) # Missing values removed expect_equal(BY(mNA, g, bsum, na.rm = TRUE), na20(fsum(mNA, g))) expect_equal(BY(mNA, g, bsum, return = "data.frame", na.rm = TRUE), qDF(na20(fsum(mNA, g)))) expect_equal(BY(mNA, g, bmean, na.rm = TRUE), fmean(mNA, g)) expect_equal(BY(mNA, g, bmean, return = "data.frame", na.rm = TRUE), qDF(fmean(mNA, g))) expect_equal(BY(mNA, f2, bscale, use.g.names = FALSE), setRownames(fscale(mNA, f2), NULL)) expect_equal(BY(mNA, f2, log, use.g.names = FALSE), setRownames(log(mNA), NULL)) expect_equal(BY(mNA, f2, quantile, na.rm = TRUE), qM(lapply(mctl(mNA, names = TRUE), function(x) unlist(lapply(split(x, f2), quantile, na.rm = TRUE))))) expect_equal(setDimnames(BY(mNA, f2, quantile, expand.wide = TRUE, na.rm = TRUE), NULL), setDimnames(do.call(cbind, lapply(mctl(mNA, names = TRUE), function(x) do.call(rbind, lapply(split(x, f2), quantile, na.rm = TRUE)))), NULL)) expect_equal(BY(mNA, f2, quantile, return = "data.frame", na.rm = TRUE), qDF(qM(lapply(mctl(mNA, names = TRUE), function(x) unlist(lapply(split(x, f2), quantile, na.rm = TRUE)))))) expect_equal(unname(BY(mNA, f2, quantile, return = "data.frame", expand.wide = TRUE, na.rm = TRUE)), unname(qDF(do.call(cbind, lapply(mctl(mNA, names = TRUE), function(x) do.call(rbind, lapply(split(x, f2), quantile, na.rm = TRUE))))))) # Missing values kept expect_equal(BY(mNA, g, bsum), fsum(mNA, g, na.rm = FALSE)) expect_equal(BY(mNA, g, bsum, return = "data.frame"), qDF(fsum(mNA, g, na.rm = FALSE))) expect_equal(BY(mNA, g, bmean), fmean(mNA, g, na.rm = FALSE)) expect_equal(BY(mNA, g, bmean, return = "data.frame"), qDF(fmean(mNA, g, na.rm = FALSE))) expect_equal(BY(mNA, f2, mysumf), qM(lapply(mctl(mNA, names = TRUE), function(x) unlist(lapply(split(x, f2), mysumf))))) expect_equal(setDimnames(BY(mNA, f2, mysumf, expand.wide = TRUE), NULL), setDimnames(do.call(cbind, lapply(mctl(mNA, names = TRUE), function(x) do.call(rbind, lapply(split(x, f2), mysumf)))), NULL)) expect_equal(BY(mNA, f2, mysumf, return = "data.frame"), qDF(qM(lapply(mctl(mNA, names = TRUE), function(x) unlist(lapply(split(x, f2), mysumf)))))) expect_equal(unname(BY(mNA, f2, mysumf, return = "data.frame", expand.wide = TRUE)), unname(qDF(do.call(cbind, lapply(mctl(mNA, names = TRUE), function(x) do.call(rbind, lapply(split(x, f2), mysumf))))))) }) test_that("BY.data.frame works as intended", { # No missing values expect_equal(BY(mtcars, g, bsum), fsum(mtcars, g)) expect_equal(BY(mtcars, g, bsum, return = "matrix"), qM(fsum(mtcars, g))) expect_equal(BY(mtcars, g, bmean), fmean(mtcars, g)) expect_equal(BY(mtcars, g, bmean, return = "matrix"), qM(fmean(mtcars, g))) # BY(mtcars, g, bscale) expect_equal(BY(mtcars, f2, bscale, use.g.names = FALSE), setRownames(fscale(mtcars, f2))) expect_equal(BY(mtcars, f2, log, use.g.names = FALSE), setRownames(log(mtcars))) expect_equal(BY(mtcars, f2, quantile), qDF(qM(lapply(mtcars, function(x) unlist(lapply(split(x, f2), quantile)))))) expect_equal(unname(BY(mtcars, f2, quantile, expand.wide = TRUE)), unname(qDF(do.call(cbind, lapply(mtcars, function(x) do.call(rbind, lapply(split(x, f2), quantile))))))) expect_equal(BY(mtcars, f2, quantile, return = "matrix"), qM(lapply(mtcars, function(x) unlist(lapply(split(x, f2), quantile))))) expect_equal(setDimnames(BY(mtcars, f2, quantile, return = "matrix", expand.wide = TRUE), NULL), setDimnames(do.call(cbind, lapply(mtcars, function(x) do.call(rbind, lapply(split(x, f2), quantile)))), NULL)) # Missing values removed expect_equal(BY(mtcNA, g, bsum, na.rm = TRUE), na20(fsum(mtcNA, g))) expect_equal(BY(mtcNA, g, bsum, return = "matrix", na.rm = TRUE), na20(qM(fsum(mtcNA, g)))) expect_equal(BY(mtcNA, g, bmean, na.rm = TRUE), fmean(mtcNA, g)) expect_equal(BY(mtcNA, g, bmean, return = "matrix", na.rm = TRUE), qM(fmean(mtcNA, g))) expect_equal(BY(mtcNA, f2, bscale, use.g.names = FALSE), setRownames(fscale(mtcNA, f2))) expect_equal(BY(mtcNA, f2, log, use.g.names = FALSE), setRownames(log(mtcNA))) expect_equal(BY(mtcNA, f2, quantile, na.rm = TRUE), qDF(qM(lapply(mtcNA, function(x) unlist(lapply(split(x, f2), quantile, na.rm = TRUE)))))) expect_equal(unname(BY(mtcNA, f2, quantile, expand.wide = TRUE, na.rm = TRUE)), unname(qDF(do.call(cbind, lapply(mtcNA, function(x) do.call(rbind, lapply(split(x, f2), quantile, na.rm = TRUE))))))) expect_equal(BY(mtcNA, f2, quantile, return = "matrix", na.rm = TRUE), qM(lapply(mtcNA, function(x) unlist(lapply(split(x, f2), quantile, na.rm = TRUE))))) expect_equal(setDimnames(BY(mtcNA, f2, quantile, return = "matrix", expand.wide = TRUE, na.rm = TRUE), NULL), setDimnames(do.call(cbind, lapply(mtcNA, function(x) do.call(rbind, lapply(split(x, f2), quantile, na.rm = TRUE)))), NULL)) # Missing values kept expect_equal(BY(mtcNA, g, bsum), fsum(mtcNA, g, na.rm = FALSE)) expect_equal(BY(mtcNA, g, bsum, return = "matrix"), qM(fsum(mtcNA, g, na.rm = FALSE))) expect_equal(BY(mtcNA, g, bmean), fmean(mtcNA, g, na.rm = FALSE)) expect_equal(BY(mtcNA, g, bmean, return = "matrix"), qM(fmean(mtcNA, g, na.rm = FALSE))) expect_equal(BY(mtcNA, f2, mysumf), qDF(qM(lapply(mtcNA, function(x) unlist(lapply(split(x, f2), mysumf)))))) expect_equal(unname(BY(mtcNA, f2, mysumf, expand.wide = TRUE)), unname(qDF(do.call(cbind, lapply(mtcNA, function(x) do.call(rbind, lapply(split(x, f2), mysumf))))))) expect_equal(BY(mtcNA, f2, mysumf, return = "matrix"), qM(lapply(mtcNA, function(x) unlist(lapply(split(x, f2), mysumf))))) expect_equal(setDimnames(BY(mtcNA, f2, mysumf, return = "matrix", expand.wide = TRUE), NULL), setDimnames(do.call(cbind, lapply(mtcNA, function(x) do.call(rbind, lapply(split(x, f2), mysumf)))), NULL)) }) test_that("Output type is as expected", { expect_true(is.atomic(BY(x, f, bsum))) expect_true(is.atomic(BY(xNA, f, bsum, na.rm = TRUE))) expect_true(is.matrix(BY(mtcars, g, bsum, return = "matrix"))) expect_true(is.data.frame(BY(m, g, bsum, return = "data.frame"))) # BY(mtcars, g, quantile, expand.wide = TRUE, return = "list") expect_equal(BY(mtcars, g, quantile, return = "list", expand.wide = TRUE), BY(m, g, quantile, return = "list", expand.wide = TRUE)) }) test_that("BY matrix <> data.frame conversions run seamlessly", { expect_equal(BY(mtcars, g, bsum, return = "matrix"), BY(m, g, bsum)) expect_equal(BY(mtcars, g, bsum, return = "matrix", use.g.names = FALSE), BY(m, g, bsum, use.g.names = FALSE)) expect_equal(BY(m, g, bsum, return = "data.frame"), BY(mtcars, g, bsum)) expect_equal(BY(m, g, bsum, return = "data.frame", use.g.names = FALSE), BY(mtcars, g, bsum, use.g.names = FALSE)) expect_equal(BY(mtcars, g, log, return = "matrix"), BY(m, g, log)) expect_equal(BY(mtcars, g, log, return = "matrix", use.g.names = FALSE), BY(m, g, log, use.g.names = FALSE)) expect_equal(BY(m, g, log, return = "data.frame"), BY(mtcars, g, log)) expect_equal(BY(m, g, log, return = "data.frame", use.g.names = FALSE), BY(mtcars, g, log, use.g.names = FALSE)) expect_equal(BY(mtcars, g, quantile, return = "matrix"), BY(m, g, quantile)) expect_equal(BY(mtcars, g, quantile, return = "matrix", use.g.names = FALSE), BY(m, g, quantile, use.g.names = FALSE)) expect_equal(BY(m, g, quantile, return = "data.frame"), BY(mtcars, g, quantile)) expect_equal(BY(m, g, quantile, return = "data.frame", use.g.names = FALSE), BY(mtcars, g, quantile, use.g.names = FALSE)) }) test_that("BY produces errors for wrong input", { expect_error(BY(~bla, g, bsum)) # Not supported type expect_error(BY(1, g, bsum)) # This only gives a warning in gsplit: g is too long expect_error(BY(x, g, bsum)) # This only gives a warning in gsplit: g is too short expect_error(BY(letters, sample.int(5, length(letters), TRUE), bsum)) # wrong type expect_error(BY(x, f, sum2)) # unknown object expect_error(BY(x, f, "sum2")) # unknown object expect_error(BY(x, f, log, bla = 1)) # unknown function argument expect_error(BY(x, f, bsum, return = "bla")) # unknown return option expect_error(BY(m, g, sum2)) # unknown object expect_error(BY(m, g, "sum2")) # unknown object expect_error(BY(m, g, log, bla = 1)) # unknown function argument expect_error(BY(m, g, bsum, return = "bla")) # unknown return option expect_error(BY(mtcars, g, sum2)) # unknown object expect_error(BY(mtcars, g, "sum2")) # unknown object expect_error(BY(mtcars, g, log, bla = 1)) # unknown function argument expect_error(BY(mtcars, g, bsum, return = "bla")) # unknown return option expect_error(BY(mtcars, ~g, bsum)) # Not supported type expect_error(BY(m, ~g, bsum)) # Not supported type expect_error(BY(x, ~g, bsum)) # Not supported type }) options(warn = 1) collapse/tests/testthat/test-fsum.R0000644000176200001440000007755014167360671017161 0ustar liggesuserscontext("fsum") bsum <- base::sum # TODO: # identical(as.integer(fsum(td, g)), unname(fsum(t, g))) # str(fsum(m)) # Do integer checks using identical, not all.equal.. # rm(list = ls()) set.seed(101) x <- rnorm(100) * 1000 w <- abs(100*rnorm(100)) wdat <- abs(100*rnorm(32)) xNA <- x wNA <- w wdatNA <- wdat xNA[sample.int(100,20)] <- NA wNA[sample.int(100,20)] <- NA wdatNA[sample.int(32, 5)] <- NA f <- as.factor(sample.int(10, 100, TRUE)) g <- GRP(mtcars, ~ cyl + vs + am) gf <- as_factor_GRP(g) mtcNA <- na_insert(mtcars) mtcNA[27,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" na20 <- function(x) { x[is.na(x)] <- 0L x } wsum <- function(x, w, na.rm = FALSE) { if(na.rm) { cc <- complete.cases(x, w) if(!any(cc)) return(NA_real_) x <- x[cc] w <- w[cc] } bsum(x*w) } wBY <- function(x, f, FUN, w, ...) { if(is.atomic(x) && !is.array(x)) return(mapply(FUN, split(x, f), split(w, f), ...)) wspl <- split(w, f) if(is.atomic(x)) return(dapply(x, function(xi) mapply(FUN, split(xi, f), wspl, ...))) qDF(dapply(x, function(xi) mapply(FUN, split(xi, f), wspl, ...), return = "matrix")) } test_that("fsum performs like base::sum and base::colSums", { expect_equal(fsum(NA), bsum(NA)) expect_equal(fsum(NA, na.rm = FALSE), bsum(NA)) expect_equal(fsum(1), bsum(1, na.rm = TRUE)) expect_identical(fsum(1:3), bsum(1:3, na.rm = TRUE)) expect_identical(fsum(-1:1), bsum(-1:1, na.rm = TRUE)) expect_equal(fsum(1, na.rm = FALSE), bsum(1)) expect_identical(fsum(1:3, na.rm = FALSE), bsum(1:3)) expect_identical(fsum(-1:1, na.rm = FALSE), bsum(-1:1)) expect_equal(fsum(x), bsum(x, na.rm = TRUE)) expect_equal(fsum(x, na.rm = FALSE), bsum(x)) expect_equal(fsum(xNA, na.rm = FALSE), bsum(xNA)) expect_equal(fsum(xNA), bsum(xNA, na.rm = TRUE)) expect_equal(fsum(mtcars), fsum(m)) expect_equal(fsum(m), colSums(m, na.rm = TRUE)) expect_equal(fsum(m, na.rm = FALSE), colSums(m)) expect_equal(fsum(mNA, na.rm = FALSE), colSums(mNA)) expect_equal(fsum(mNA), colSums(mNA, na.rm = TRUE)) expect_equal(fsum(mtcars), dapply(mtcars, bsum, na.rm = TRUE)) expect_equal(fsum(mtcars, na.rm = FALSE), dapply(mtcars, bsum)) expect_equal(fsum(mtcNA, na.rm = FALSE), dapply(mtcNA, bsum)) expect_equal(fsum(mtcNA), dapply(mtcNA, bsum, na.rm = TRUE)) expect_equal(fsum(x, f), BY(x, f, bsum, na.rm = TRUE)) expect_equal(fsum(x, f, na.rm = FALSE), BY(x, f, bsum)) expect_equal(fsum(xNA, f, na.rm = FALSE), BY(xNA, f, bsum)) expect_equal(na20(fsum(xNA, f)), BY(xNA, f, bsum, na.rm = TRUE)) expect_equal(fsum(m, g), BY(m, g, bsum, na.rm = TRUE)) expect_equal(fsum(m, g, na.rm = FALSE), BY(m, g, bsum)) expect_equal(fsum(mNA, g, na.rm = FALSE), BY(mNA, g, bsum)) expect_equal(na20(fsum(mNA, g)), BY(mNA, g, bsum, na.rm = TRUE)) # error, bsum(NA) give 0 expect_equal(fsum(mtcars, g), BY(mtcars, g, bsum, na.rm = TRUE)) expect_equal(fsum(mtcars, g, na.rm = FALSE), BY(mtcars, g, bsum)) expect_equal(fsum(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bsum)) expect_equal(na20(fsum(mtcNA, g)), BY(mtcNA, g, bsum, na.rm = TRUE)) # error, bsum(NA) give 0 }) test_that("fsum with weights performs like wsum (defined above)", { # complete weights expect_equal(fsum(NA, w = 1), wsum(NA, 1)) expect_equal(fsum(NA, w = 1, na.rm = FALSE), wsum(NA, 1)) expect_equal(fsum(1, w = 1), wsum(1, w = 1)) expect_equal(fsum(1:3, w = 1:3), wsum(1:3, 1:3)) expect_equal(fsum(-1:1, w = 1:3), wsum(-1:1, 1:3)) expect_equal(fsum(1, w = 1, na.rm = FALSE), wsum(1, 1)) expect_equal(fsum(1:3, w = c(0.99,3454,1.111), na.rm = FALSE), wsum(1:3, c(0.99,3454,1.111))) expect_equal(fsum(-1:1, w = 1:3, na.rm = FALSE), wsum(-1:1, 1:3)) expect_equal(fsum(x, w = w), wsum(x, w)) expect_equal(fsum(x, w = w, na.rm = FALSE), wsum(x, w)) expect_equal(fsum(xNA, w = w, na.rm = FALSE), wsum(xNA, w)) expect_equal(fsum(xNA, w = w), wsum(xNA, w, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdat), fsum(m, w = wdat)) expect_equal(fsum(m, w = wdat), dapply(m, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(m, w = wdat, na.rm = FALSE), dapply(m, wsum, wdat)) expect_equal(fsum(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wsum, wdat)) expect_equal(fsum(mNA, w = wdat), dapply(mNA, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdat), dapply(mtcars, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wsum, wdat)) expect_equal(fsum(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wsum, wdat)) expect_equal(fsum(mtcNA, w = wdat), dapply(mtcNA, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(x, f, w), wBY(x, f, wsum, w)) expect_equal(fsum(x, f, w, na.rm = FALSE), wBY(x, f, wsum, w)) expect_equal(fsum(xNA, f, w, na.rm = FALSE), wBY(xNA, f, wsum, w)) expect_equal(fsum(xNA, f, w), wBY(xNA, f, wsum, w, na.rm = TRUE)) expect_equal(fsum(m, g, wdat), wBY(m, gf, wsum, wdat)) expect_equal(fsum(m, g, wdat, na.rm = FALSE), wBY(m, gf, wsum, wdat)) expect_equal(fsum(mNA, g, wdat, na.rm = FALSE), wBY(mNA, gf, wsum, wdat)) expect_equal(fsum(mNA, g, wdat), wBY(mNA, gf, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(mtcars, g, wdat), wBY(mtcars, gf, wsum, wdat)) expect_equal(fsum(mtcars, g, wdat, na.rm = FALSE), wBY(mtcars, gf, wsum, wdat)) expect_equal(fsum(mtcNA, g, wdat, na.rm = FALSE), wBY(mtcNA, gf, wsum, wdat)) expect_equal(fsum(mtcNA, g, wdat), wBY(mtcNA, gf, wsum, wdat, na.rm = TRUE)) # missing weights expect_equal(fsum(NA, w = NA), wsum(NA, NA)) expect_equal(fsum(NA, w = NA, na.rm = FALSE), wsum(NA, NA)) expect_equal(fsum(1, w = NA), wsum(1, w = NA)) expect_equal(fsum(1:3, w = c(NA,1:2)), wsum(1:3, c(NA,1:2), na.rm = TRUE)) expect_equal(fsum(-1:1, w = c(NA,1:2)), wsum(-1:1, c(NA,1:2), na.rm = TRUE)) expect_equal(fsum(1, w = NA, na.rm = FALSE), wsum(1, NA)) expect_equal(fsum(1:3, w = c(NA,1:2), na.rm = FALSE), wsum(1:3, c(NA,1:2))) expect_equal(fsum(-1:1, w = c(NA,1:2), na.rm = FALSE), wsum(-1:1, c(NA,1:2))) expect_equal(fsum(x, w = wNA), wsum(x, wNA, na.rm = TRUE)) expect_equal(fsum(x, w = wNA, na.rm = FALSE), wsum(x, wNA)) expect_equal(fsum(xNA, w = wNA, na.rm = FALSE), wsum(xNA, wNA)) expect_equal(fsum(xNA, w = wNA), wsum(xNA, wNA, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdatNA), fsum(m, w = wdatNA)) expect_equal(fsum(m, w = wdatNA), dapply(m, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(m, w = wdatNA, na.rm = FALSE), dapply(m, wsum, wdatNA)) expect_equal(fsum(mNA, w = wdatNA, na.rm = FALSE), dapply(mNA, wsum, wdatNA)) expect_equal(fsum(mNA, w = wdatNA), dapply(mNA, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdatNA), dapply(mtcars, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdatNA, na.rm = FALSE), dapply(mtcars, wsum, wdatNA)) expect_equal(fsum(mtcNA, w = wdatNA, na.rm = FALSE), dapply(mtcNA, wsum, wdatNA)) expect_equal(fsum(mtcNA, w = wdatNA), dapply(mtcNA, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(x, f, wNA), wBY(x, f, wsum, wNA, na.rm = TRUE)) expect_equal(fsum(x, f, wNA, na.rm = FALSE), wBY(x, f, wsum, wNA)) expect_equal(fsum(xNA, f, wNA, na.rm = FALSE), wBY(xNA, f, wsum, wNA)) expect_equal(fsum(xNA, f, wNA), wBY(xNA, f, wsum, wNA, na.rm = TRUE)) expect_equal(fsum(m, g, wdatNA), wBY(m, gf, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(m, g, wdatNA, na.rm = FALSE), wBY(m, gf, wsum, wdatNA)) expect_equal(fsum(mNA, g, wdatNA, na.rm = FALSE), wBY(mNA, gf, wsum, wdatNA)) expect_equal(fsum(mNA, g, wdatNA), wBY(mNA, gf, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(mtcars, g, wdatNA), wBY(mtcars, gf, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(mtcars, g, wdatNA, na.rm = FALSE), wBY(mtcars, gf, wsum, wdatNA)) expect_equal(fsum(mtcNA, g, wdatNA, na.rm = FALSE), wBY(mtcNA, gf, wsum, wdatNA)) expect_equal(fsum(mtcNA, g, wdatNA), wBY(mtcNA, gf, wsum, wdatNA, na.rm = TRUE)) }) test_that("fsum performs numerically stable", { expect_true(all_obj_equal(replicate(50, fsum(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g), simplify = FALSE))) }) test_that("fsum with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fsum(1, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(NA, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(NA, w = 1, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fsum with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fsum(1, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(NA, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(NA, w = NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, wdatNA), simplify = FALSE))) }) test_that("fsum handles special values in the right way", { expect_equal(fsum(NA), NA_real_) expect_equal(fsum(NaN), NaN) expect_equal(fsum(Inf), Inf) expect_equal(fsum(-Inf), -Inf) expect_equal(fsum(TRUE), 1) expect_equal(fsum(FALSE), 0) expect_equal(fsum(NA, na.rm = FALSE), NA_real_) expect_equal(fsum(NaN, na.rm = FALSE), NaN) expect_equal(fsum(Inf, na.rm = FALSE), Inf) expect_equal(fsum(-Inf, na.rm = FALSE), -Inf) expect_equal(fsum(TRUE, na.rm = FALSE), 1) expect_equal(fsum(FALSE, na.rm = FALSE), 0) expect_equal(fsum(c(1,NA)), 1) expect_equal(fsum(c(1,NaN)), 1) expect_equal(fsum(c(1,Inf)), Inf) expect_equal(fsum(c(1,-Inf)), -Inf) expect_equal(fsum(c(FALSE,TRUE)), 1) expect_equal(fsum(c(TRUE,TRUE)), 2) expect_equal(fsum(c(1,Inf), na.rm = FALSE), Inf) expect_equal(fsum(c(1,-Inf), na.rm = FALSE), -Inf) expect_equal(fsum(c(FALSE,TRUE), na.rm = FALSE), 1) expect_equal(fsum(c(TRUE,TRUE), na.rm = FALSE), 2) }) test_that("fsum with weights handles special values in the right way", { expect_equal(fsum(NA, w = 1), NA_real_) expect_equal(fsum(NaN, w = 1), NaN) expect_equal(fsum(Inf, w = 1), Inf) expect_equal(fsum(-Inf, w = 1), -Inf) expect_equal(fsum(TRUE, w = 1), 1) expect_equal(fsum(FALSE, w = 1), 0) expect_equal(fsum(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fsum(NaN, w = 1, na.rm = FALSE), NaN) expect_equal(fsum(Inf, w = 1, na.rm = FALSE), Inf) expect_equal(fsum(-Inf, w = 1, na.rm = FALSE), -Inf) expect_equal(fsum(TRUE, w = 1, na.rm = FALSE), 1) expect_equal(fsum(FALSE, w = 1, na.rm = FALSE), 0) expect_equal(fsum(NA, w = NA), NA_real_) expect_equal(fsum(NaN, w = NA), NA_real_) expect_equal(fsum(Inf, w = NA), NA_real_) expect_equal(fsum(-Inf, w = NA), NA_real_) expect_equal(fsum(TRUE, w = NA), NA_real_) expect_equal(fsum(FALSE, w = NA), NA_real_) expect_equal(fsum(NA, w = NA, na.rm = FALSE), NA_real_) expect_equal(fsum(NaN, w = NA, na.rm = FALSE), NA_real_) expect_equal(fsum(Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fsum(-Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fsum(TRUE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fsum(FALSE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fsum(1:3, w = c(1,Inf,3)), Inf) expect_equal(fsum(1:3, w = c(1,-Inf,3)), -Inf) expect_equal(fsum(1:3, w = c(1,Inf,3), na.rm = FALSE), Inf) expect_equal(fsum(1:3, w = c(1,-Inf,3), na.rm = FALSE), -Inf) }) test_that("fsum produces errors for wrong input", { expect_error(fsum("a")) expect_error(fsum(NA_character_)) expect_error(fsum(mNAc)) expect_error(fsum(mNAc, f)) expect_error(fsum(1:2,1:3)) expect_error(fsum(m,1:31)) expect_error(fsum(mtcars,1:31)) expect_error(fsum(mtcars, w = 1:31)) expect_error(fsum("a", w = 1)) expect_error(fsum(1:2, w = 1:3)) expect_error(fsum(NA_character_, w = 1)) expect_error(fsum(mNAc, w = wdat)) expect_error(fsum(mNAc, f, wdat)) expect_error(fsum(mNA, w = 1:33)) expect_error(fsum(1:2,1:2, 1:3)) expect_error(fsum(m,1:32,1:20)) expect_error(fsum(mtcars,1:32,1:10)) expect_error(fsum(1:2, w = c("a","b"))) expect_error(fsum(wlddev)) expect_error(fsum(wlddev, w = wlddev$year)) expect_error(fsum(wlddev, wlddev$iso3c)) expect_error(fsum(wlddev, wlddev$iso3c, wlddev$year)) }) # Testing fsum with integers... x <- as.integer(x) xNA <- as.integer(xNA) mtcars <- dapply(mtcars, as.integer) mtcNA <- dapply(mtcNA, as.integer) storage.mode(m) <- "integer" storage.mode(mNA) <- "integer" toint <- function(x) { storage.mode(x) <- "integer" x } test_that("fsum with integers performs like base::sum and base::colSums", { expect_identical(fsum(x), bsum(x, na.rm = TRUE)) expect_identical(fsum(x, na.rm = FALSE), bsum(x)) expect_identical(fsum(xNA, na.rm = FALSE), bsum(xNA)) expect_identical(fsum(xNA), bsum(xNA, na.rm = TRUE)) expect_identical(toint(fsum(mtcars)), fsum(m)) expect_identical(fsum(m), toint(colSums(m, na.rm = TRUE))) expect_identical(fsum(m, na.rm = FALSE), toint(colSums(m))) expect_identical(fsum(mNA, na.rm = FALSE), toint(colSums(mNA))) expect_identical(fsum(mNA), toint(colSums(mNA, na.rm = TRUE))) expect_identical(toint(fsum(mtcars)), dapply(mtcars, bsum, na.rm = TRUE)) expect_identical(toint(fsum(mtcars, na.rm = FALSE)), dapply(mtcars, bsum)) expect_identical(toint(fsum(mtcNA, na.rm = FALSE)), dapply(mtcNA, bsum)) expect_identical(toint(fsum(mtcNA)), dapply(mtcNA, bsum, na.rm = TRUE)) expect_identical(fsum(x, f), BY(x, f, bsum, na.rm = TRUE)) expect_identical(fsum(x, f, na.rm = FALSE), BY(x, f, bsum)) expect_identical(fsum(xNA, f, na.rm = FALSE), BY(xNA, f, bsum)) expect_identical(na20(fsum(xNA, f)), BY(xNA, f, bsum, na.rm = TRUE)) expect_identical(fsum(m, g), BY(m, g, bsum, na.rm = TRUE)) expect_identical(fsum(m, g, na.rm = FALSE), BY(m, g, bsum)) expect_identical(fsum(mNA, g, na.rm = FALSE), BY(mNA, g, bsum)) expect_identical(na20(fsum(mNA, g)), BY(mNA, g, bsum, na.rm = TRUE)) # error, bsum(NA) give 0 expect_identical(fsum(mtcars, g), BY(mtcars, g, bsum, na.rm = TRUE)) expect_identical(fsum(mtcars, g, na.rm = FALSE), BY(mtcars, g, bsum)) expect_identical(fsum(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bsum)) expect_identical(na20(fsum(mtcNA, g)), BY(mtcNA, g, bsum, na.rm = TRUE)) # error, bsum(NA) give 0 }) test_that("fsum with integers and weights performs like wsum (defined above)", { # complete weights expect_equal(fsum(x, w = w), wsum(x, w)) expect_equal(fsum(x, w = w, na.rm = FALSE), wsum(x, w)) expect_equal(fsum(xNA, w = w, na.rm = FALSE), wsum(xNA, w)) expect_equal(fsum(xNA, w = w), wsum(xNA, w, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdat), fsum(m, w = wdat)) expect_equal(fsum(m, w = wdat), dapply(m, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(m, w = wdat, na.rm = FALSE), dapply(m, wsum, wdat)) expect_equal(fsum(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wsum, wdat)) expect_equal(fsum(mNA, w = wdat), dapply(mNA, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdat), dapply(mtcars, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wsum, wdat)) expect_equal(fsum(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wsum, wdat)) expect_equal(fsum(mtcNA, w = wdat), dapply(mtcNA, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(x, f, w), wBY(x, f, wsum, w)) expect_equal(fsum(x, f, w, na.rm = FALSE), wBY(x, f, wsum, w)) expect_equal(fsum(xNA, f, w, na.rm = FALSE), wBY(xNA, f, wsum, w)) expect_equal(fsum(xNA, f, w), wBY(xNA, f, wsum, w, na.rm = TRUE)) expect_equal(fsum(m, g, wdat), wBY(m, gf, wsum, wdat)) expect_equal(fsum(m, g, wdat, na.rm = FALSE), wBY(m, gf, wsum, wdat)) expect_equal(fsum(mNA, g, wdat, na.rm = FALSE), wBY(mNA, gf, wsum, wdat)) expect_equal(fsum(mNA, g, wdat), wBY(mNA, gf, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(mtcars, g, wdat), wBY(mtcars, gf, wsum, wdat)) expect_equal(fsum(mtcars, g, wdat, na.rm = FALSE), wBY(mtcars, gf, wsum, wdat)) expect_equal(fsum(mtcNA, g, wdat, na.rm = FALSE), wBY(mtcNA, gf, wsum, wdat)) expect_equal(fsum(mtcNA, g, wdat), wBY(mtcNA, gf, wsum, wdat, na.rm = TRUE)) # missing weights expect_equal(fsum(x, w = wNA), wsum(x, wNA, na.rm = TRUE)) expect_equal(fsum(x, w = wNA, na.rm = FALSE), wsum(x, wNA)) expect_equal(fsum(xNA, w = wNA, na.rm = FALSE), wsum(xNA, wNA)) expect_equal(fsum(xNA, w = wNA), wsum(xNA, wNA, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdatNA), fsum(m, w = wdatNA)) expect_equal(fsum(m, w = wdatNA), dapply(m, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(m, w = wdatNA, na.rm = FALSE), dapply(m, wsum, wdatNA)) expect_equal(fsum(mNA, w = wdatNA, na.rm = FALSE), dapply(mNA, wsum, wdatNA)) expect_equal(fsum(mNA, w = wdatNA), dapply(mNA, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdatNA), dapply(mtcars, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdatNA, na.rm = FALSE), dapply(mtcars, wsum, wdatNA)) expect_equal(fsum(mtcNA, w = wdatNA, na.rm = FALSE), dapply(mtcNA, wsum, wdatNA)) expect_equal(fsum(mtcNA, w = wdatNA), dapply(mtcNA, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(x, f, wNA), wBY(x, f, wsum, wNA, na.rm = TRUE)) expect_equal(fsum(x, f, wNA, na.rm = FALSE), wBY(x, f, wsum, wNA)) expect_equal(fsum(xNA, f, wNA, na.rm = FALSE), wBY(xNA, f, wsum, wNA)) expect_equal(fsum(xNA, f, wNA), wBY(xNA, f, wsum, wNA, na.rm = TRUE)) expect_equal(fsum(m, g, wdatNA), wBY(m, gf, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(m, g, wdatNA, na.rm = FALSE), wBY(m, gf, wsum, wdatNA)) expect_equal(fsum(mNA, g, wdatNA, na.rm = FALSE), wBY(mNA, gf, wsum, wdatNA)) expect_equal(fsum(mNA, g, wdatNA), wBY(mNA, gf, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(mtcars, g, wdatNA), wBY(mtcars, gf, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(mtcars, g, wdatNA, na.rm = FALSE), wBY(mtcars, gf, wsum, wdatNA)) expect_equal(fsum(mtcNA, g, wdatNA, na.rm = FALSE), wBY(mtcNA, gf, wsum, wdatNA)) expect_equal(fsum(mtcNA, g, wdatNA), wBY(mtcNA, gf, wsum, wdatNA, na.rm = TRUE)) }) test_that("fsum performs numerically stable", { expect_true(all_identical(replicate(50, fsum(x), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(xNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(m), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mtcars), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mtcNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(x, f), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(xNA, f), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(m, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mNA, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mtcars, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mtcNA, g), simplify = FALSE))) }) test_that("fsum with integers and complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fsum(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fsum with integers and missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fsum(x, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, wdatNA), simplify = FALSE))) }) test_that("fsum with integers produces errors for wrong input", { expect_error(fsum(m,1:31)) expect_error(fsum(mtcars,1:31)) expect_error(fsum(mtcars, w = 1:31)) expect_error(fsum(mNA, w = 1:33)) expect_error(fsum(m,1:32,1:20)) expect_error(fsum(mtcars,1:32,1:10)) }) test_that("Miscellaneous Issues with Integers", { expect_identical(fsum(NA_integer_), NA_integer_) expect_identical(fsum(NA_integer_, na.rm = FALSE), NA_integer_) expect_identical(fsum(c(NA_integer_, NA_integer_)), NA_integer_) expect_identical(fsum(c(NA_integer_, NA_integer_), na.rm = FALSE), NA_integer_) expect_identical(fsum(c(NA_integer_, 1L)), 1L) expect_identical(fsum(c(NA_integer_, 1L), na.rm = FALSE), NA_integer_) expect_identical(fsum(c(-2147483646L, -2L)), -2147483648) expect_identical(fsum(c(-2147483646L, -2L), na.rm = FALSE), -2147483648) expect_identical(fsum(-c(-2147483646L, -2L)), 2147483648) expect_identical(fsum(-c(-2147483646L, -2L), na.rm = FALSE), 2147483648) }) x <- as.integer(wlddev$year*1000000L) set.seed(101) xNA <- na_insert(x) g <- wlddev$iso3c test_that("Integer overflow errors", { # With groups expect_error(fsum(x, g)) expect_error(fsum(x, g, na.rm = FALSE)) expect_error(fsum(xNA, g)) expect_error(fsum(xNA, g, na.rm = FALSE)) }) collapse/tests/testthat/test-seqid-groupid.R0000644000176200001440000001633514057472213020747 0ustar liggesuserscontext("seqid, groupid") # rm(list = ls()) x <- c(1:10, 1:10) test_that("seqid performas as expected", { expect_identical(unattrib(seqid(x)), rep(1:2, each = 10)) expect_identical(unattrib(seqid(x)), unattrib(seqid(x, na.skip = TRUE))) expect_identical(unattrib(seqid(c(1, NA, 3), na.skip = TRUE)), as.integer(c(1, NA, 2))) expect_identical(unattrib(seqid(c(1, NA, 2), na.skip = TRUE)), as.integer(c(1, NA, 1))) expect_identical(unattrib(seqid(c(1, NA, 3), na.skip = TRUE, skip.seq = TRUE)), as.integer(c(1, NA, 1))) expect_identical(unattrib(seqid(c(1, NA, 2), na.skip = TRUE, skip.seq = TRUE)), as.integer(c(1, NA, 2))) expect_identical(unattrib(seqid(x)), unattrib(seqid(x, na.skip = TRUE))) set.seed(101) xNA <- na_insert(x, prop = 0.15) expect_true(!anyNA(seqid(xNA))) expect_identical(is.na(seqid(xNA, na.skip = TRUE)), is.na(xNA)) xNA2 <- xNA xNA2[c(1,20)] <- NA_integer_ expect_true(!anyNA(seqid(xNA2))) expect_identical(is.na(seqid(xNA2, na.skip = TRUE)), is.na(xNA2)) # Start at 0 expect_equal(seqid(x, start = 0)[1], 0L) expect_equal(seqid(x, na.skip = TRUE, start = 0)[1], 0L) expect_identical(unclass(seqid(x, start = 0)), unclass(seqid(x, na.skip = TRUE, start = 0))) o <- order(rnorm(20)) xuo <- x[o] xNAuo <- xNA[o] xNA2uo <- xNA2[o] o <- order(o) expect_identical(x, xuo[o]) expect_identical(xNA, xNAuo[o]) expect_identical(xNA2, xNA2uo[o]) # seqid(xuo) # seqid(xuo, na.skip = TRUE) # seqid(xNAuo) # seqid(xNAuo, na.skip = TRUE) # seqid(xNA2uo) # seqid(xNA2uo, na.skip = TRUE) expect_identical(seqid(xuo, o)[o], unattrib(seqid(x))) expect_identical(seqid(xuo, o, na.skip = TRUE)[o], unattrib(seqid(x, na.skip = TRUE))) expect_identical(seqid(xNAuo, o)[o], unattrib(seqid(xNA))) expect_identical(seqid(xNAuo, o, na.skip = TRUE)[o], unattrib(seqid(xNA, na.skip = TRUE))) expect_identical(seqid(xNA2uo, o)[o], unattrib(seqid(xNA2))) expect_identical(seqid(xNA2uo, o, na.skip = TRUE)[o], unattrib(seqid(xNA2, na.skip = TRUE))) # Check o expect_identical(seqid(xuo, o, check.o = FALSE)[o], unattrib(seqid(x))) expect_identical(seqid(xuo, o, na.skip = TRUE, check.o = FALSE)[o], unattrib(seqid(x, na.skip = TRUE))) expect_identical(seqid(xNAuo, o, check.o = FALSE)[o], unattrib(seqid(xNA))) expect_identical(seqid(xNAuo, o, na.skip = TRUE, check.o = FALSE)[o], unattrib(seqid(xNA, na.skip = TRUE))) expect_identical(seqid(xNA2uo, o, check.o = FALSE)[o], unattrib(seqid(xNA2))) expect_identical(seqid(xNA2uo, o, na.skip = TRUE, check.o = FALSE)[o], unattrib(seqid(xNA2, na.skip = TRUE))) # Start at 0 expect_identical(seqid(xuo, o, start = 0)[o], unattrib(seqid(x, start = 0))) expect_identical(seqid(xuo, o, na.skip = TRUE, start = 0)[o], unattrib(seqid(x, na.skip = TRUE, start = 0))) expect_identical(seqid(xNAuo, o, start = 0)[o], unattrib(seqid(xNA, start = 0))) expect_identical(seqid(xNAuo, o, na.skip = TRUE, start = 0)[o], unattrib(seqid(xNA, na.skip = TRUE, start = 0))) expect_identical(seqid(xNA2uo, o, start = 0)[o], unattrib(seqid(xNA2, start = 0))) expect_identical(seqid(xNA2uo, o, na.skip = TRUE, start = 0)[o], unattrib(seqid(xNA2, na.skip = TRUE, start = 0))) # Check o, start at 0 expect_identical(seqid(xuo, o, check.o = FALSE, start = 0)[o], unattrib(seqid(x, start = 0))) expect_identical(seqid(xuo, o, na.skip = TRUE, check.o = FALSE, start = 0)[o], unattrib(seqid(x, na.skip = TRUE, start = 0))) expect_identical(seqid(xNAuo, o, check.o = FALSE, start = 0)[o], unattrib(seqid(xNA, start = 0))) expect_identical(seqid(xNAuo, o, na.skip = TRUE, check.o = FALSE, start = 0)[o], unattrib(seqid(xNA, na.skip = TRUE, start = 0))) expect_identical(seqid(xNA2uo, o, check.o = FALSE, start = 0)[o], unattrib(seqid(xNA2, start = 0))) expect_identical(seqid(xNA2uo, o, na.skip = TRUE, check.o = FALSE, start = 0)[o], unattrib(seqid(xNA2, na.skip = TRUE, start = 0))) }) # Testing groupid ----------------------- x <- rep(5:6, each = 10) test_that("groupid performas as expected", { # groupid(x) # groupid(x, na.skip = TRUE) set.seed(101) xNA <- na_insert(x, prop = 0.15) # groupid(xNA) # desirable behavior ?? # groupid(xNA, na.skip = TRUE) # -> Yes !! xNA2 <- xNA xNA2[c(1,20)] <- NA_integer_ # groupid(xNA2) # groupid(xNA2, na.skip = TRUE) # This was an issue !! expect_identical(groupid(c(NA,NA,1.343,NA,NA)), groupid(c(NA,NA,1L,NA,NA))) # Start at 0 # groupid(x, start = 0) # groupid(x, na.skip = TRUE, start = 0) # groupid(xNA, start = 0) # groupid(xNA, na.skip = TRUE, start = 0) # groupid(xNA2, start = 0) # groupid(xNA2, na.skip = TRUE, start = 0) o <- order(rnorm(20)) xuo <- x[o] xNAuo <- xNA[o] xNA2uo <- xNA2[o] o <- order(o) expect_identical(x, xuo[o]) expect_identical(xNA, xNAuo[o]) expect_identical(xNA2, xNA2uo[o]) # groupid(xuo) # groupid(xuo, na.skip = TRUE) # groupid(xNAuo) # groupid(xNAuo, na.skip = TRUE) # groupid(xNA2uo) # groupid(xNA2uo, na.skip = TRUE) expect_identical(groupid(xuo, o)[o], unattrib(groupid(x))) expect_identical(groupid(xuo, o, na.skip = TRUE)[o], unattrib(groupid(x, na.skip = TRUE))) expect_identical(groupid(xNAuo, o)[o], unattrib(groupid(xNA))) expect_identical(groupid(xNAuo, o, na.skip = TRUE)[o], unattrib(groupid(xNA, na.skip = TRUE))) expect_identical(groupid(xNA2uo, o)[o], unattrib(groupid(xNA2))) expect_identical(groupid(xNA2uo, o, na.skip = TRUE)[o], unattrib(groupid(xNA2, na.skip = TRUE))) # Check o expect_identical(groupid(xuo, o, check.o = FALSE)[o], unattrib(groupid(x))) expect_identical(groupid(xuo, o, na.skip = TRUE, check.o = FALSE)[o], unattrib(groupid(x, na.skip = TRUE))) expect_identical(groupid(xNAuo, o, check.o = FALSE)[o], unattrib(groupid(xNA))) expect_identical(groupid(xNAuo, o, na.skip = TRUE, check.o = FALSE)[o], unattrib(groupid(xNA, na.skip = TRUE))) expect_identical(groupid(xNA2uo, o, check.o = FALSE)[o], unattrib(groupid(xNA2))) expect_identical(groupid(xNA2uo, o, na.skip = TRUE, check.o = FALSE)[o], unattrib(groupid(xNA2, na.skip = TRUE))) # Start at 0 expect_identical(groupid(xuo, o, start = 0)[o], unattrib(groupid(x, start = 0))) expect_identical(groupid(xuo, o, na.skip = TRUE, start = 0)[o], unattrib(groupid(x, na.skip = TRUE, start = 0))) expect_identical(groupid(xNAuo, o, start = 0)[o], unattrib(groupid(xNA, start = 0))) expect_identical(groupid(xNAuo, o, na.skip = TRUE, start = 0)[o], unattrib(groupid(xNA, na.skip = TRUE, start = 0))) expect_identical(groupid(xNA2uo, o, start = 0)[o], unattrib(groupid(xNA2, start = 0))) expect_identical(groupid(xNA2uo, o, na.skip = TRUE, start = 0)[o], unattrib(groupid(xNA2, na.skip = TRUE, start = 0))) # Check o, start at 0 expect_identical(groupid(xuo, o, check.o = FALSE, start = 0)[o], unattrib(groupid(x, start = 0))) expect_identical(groupid(xuo, o, na.skip = TRUE, check.o = FALSE, start = 0)[o], unattrib(groupid(x, na.skip = TRUE, start = 0))) expect_identical(groupid(xNAuo, o, check.o = FALSE, start = 0)[o], unattrib(groupid(xNA, start = 0))) expect_identical(groupid(xNAuo, o, na.skip = TRUE, check.o = FALSE, start = 0)[o], unattrib(groupid(xNA, na.skip = TRUE, start = 0))) expect_identical(groupid(xNA2uo, o, check.o = FALSE, start = 0)[o], unattrib(groupid(xNA2, start = 0))) expect_identical(groupid(xNA2uo, o, na.skip = TRUE, check.o = FALSE, start = 0)[o], unattrib(groupid(xNA2, na.skip = TRUE, start = 0))) }) collapse/tests/testthat/test-fnobs-fndistinct.R0000644000176200001440000002334114167352423021442 0ustar liggesuserscontext("fnobs and fndistinct") # rm(list = ls()) set.seed(101) x <- rnorm(100) xNA <- x xNA[sample.int(100,20)] <- NA f <- as.factor(sample.int(10, 100, TRUE)) data <- fsubset(wlddev, iso3c %in% c("BLZ","IND","USA","SRB","GRL")) g <- GRP(droplevels(data$iso3c)) dataNA <- na_insert(data) m <- as.matrix(data) mNA <- as.matrix(dataNA) data$LC <- as.list(data$PCGDP) dataNA$LC <- lapply(na_insert(data["LC"])[[1]], function(x) if(is.na(x)) NULL else x) bsum <- base::sum Nobs <- function(x) if(is.list(x)) bsum(lengths(x) > 0L) else bsum(!is.na(x)) Ndistinct <- function(x, na.rm = FALSE) { if(na.rm) return(length(unique(x[!is.na(x)]))) return(length(unique(x))) } # fnobs test_that("fnobs performs like Nobs (defined above)", { expect_equal(fnobs(NA), as.double(Nobs(NA))) expect_equal(fnobs(1), Nobs(1)) expect_equal(fnobs(1:3), Nobs(1:3)) expect_equal(fnobs(-1:1), Nobs(-1:1)) expect_equal(fnobs(x), Nobs(x)) expect_equal(fnobs(xNA), Nobs(xNA)) expect_equal(fnobs(data[-length(data)]), fnobs(m)) expect_equal(fnobs(m), dapply(m, Nobs)) expect_equal(fnobs(mNA), dapply(mNA, Nobs)) expect_equal(fnobs(x, f), BY(x, f, Nobs)) expect_equal(fnobs(xNA, f), BY(xNA, f, Nobs)) expect_equal(fnobs(m, g), BY(m, g, Nobs)) expect_equal(fnobs(mNA, g), BY(mNA, g, Nobs)) expect_equal(fnobs(data, g), BY(data, g, Nobs)) expect_equal(fnobs(dataNA, g), BY(dataNA, g, Nobs)) }) test_that("fnobs performs numerically stable", { expect_true(all_obj_equal(replicate(50, fnobs(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(data), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(dataNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(data, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(dataNA, g), simplify = FALSE))) }) test_that("fnobs handles special values in the right way", { expect_equal(fnobs(NA), 0) expect_equal(fnobs(NaN), 0) expect_equal(fnobs(Inf), 1) expect_equal(fnobs(-Inf), 1) expect_equal(fnobs(TRUE), 1) expect_equal(fnobs(FALSE), 1) }) test_that("fnobs produces errors for wrong input", { expect_visible(fnobs("a")) expect_visible(fnobs(NA_character_)) expect_visible(fnobs(mNA)) expect_visible(fnobs(mNA, g)) expect_error(fnobs(1:2,1:3)) expect_error(fnobs(m,1:31)) expect_error(fnobs(m, 1)) expect_error(fnobs(data,1:31)) expect_visible(fnobs(wlddev)) expect_visible(fnobs(wlddev, wlddev$iso3c)) }) data$LC <- NULL dataNA$LC <- NULL # fndistinct test_that("fndistinct performs like Ndistinct (defined above)", { expect_equal(fndistinct(NA), 0) expect_equal(fndistinct(NA, na.rm = FALSE), 1) expect_equal(fndistinct(1), Ndistinct(1, na.rm = TRUE)) expect_equal(fndistinct(1:3), Ndistinct(1:3, na.rm = TRUE)) expect_equal(fndistinct(-1:1), Ndistinct(-1:1, na.rm = TRUE)) expect_equal(fndistinct(1, na.rm = FALSE), Ndistinct(1)) expect_equal(fndistinct(1:3, na.rm = FALSE), Ndistinct(1:3)) expect_equal(fndistinct(-1:1, na.rm = FALSE), Ndistinct(-1:1)) expect_equal(fndistinct(x), Ndistinct(x, na.rm = TRUE)) expect_equal(fndistinct(x, na.rm = FALSE), Ndistinct(x)) expect_equal(fndistinct(xNA, na.rm = FALSE), Ndistinct(xNA)) expect_equal(fndistinct(xNA), Ndistinct(xNA, na.rm = TRUE)) expect_equal(fndistinct(data), fndistinct(m)) expect_equal(fndistinct(m), dapply(m, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(m, na.rm = FALSE), dapply(m, Ndistinct)) expect_equal(fndistinct(mNA, na.rm = FALSE), dapply(mNA, Ndistinct)) expect_equal(fndistinct(mNA), dapply(mNA, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(x, f), BY(x, f, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(x, f, na.rm = FALSE), BY(x, f, Ndistinct)) expect_equal(fndistinct(xNA, f, na.rm = FALSE), BY(xNA, f, Ndistinct)) expect_equal(fndistinct(xNA, f), BY(xNA, f, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(m, g), BY(m, g, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(m, g, na.rm = FALSE), BY(m, g, Ndistinct)) expect_equal(fndistinct(mNA, g, na.rm = FALSE), BY(mNA, g, Ndistinct)) expect_equal(fndistinct(mNA, g), BY(mNA, g, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(data, g), BY(data, g, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(data, g, na.rm = FALSE), BY(data, g, Ndistinct)) expect_equal(fndistinct(dataNA, g, na.rm = FALSE), BY(dataNA, g, Ndistinct)) expect_equal(fndistinct(dataNA, g), BY(dataNA, g, Ndistinct, na.rm = TRUE)) }) test_that("fndistinct performs numerically stable", { expect_true(all_obj_equal(replicate(50, fndistinct(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(data), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(data, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(dataNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(dataNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(data, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(data, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(dataNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(dataNA, g), simplify = FALSE))) }) test_that("fndistinct handles special values in the right way", { expect_equal(fndistinct(NA), 0) expect_equal(fndistinct(NaN), 0) expect_equal(fndistinct(Inf), 1) expect_equal(fndistinct(-Inf), 1) expect_equal(fndistinct(TRUE), 1) expect_equal(fndistinct(FALSE), 1) expect_equal(fndistinct(c(TRUE,TRUE)), 1) expect_equal(fndistinct(c(TRUE,FALSE)), 2) expect_equal(fndistinct(c(FALSE,TRUE)), 2) expect_equal(fndistinct(c(FALSE,FALSE)), 1) expect_equal(fndistinct(c(NA,TRUE,TRUE,NA)), 1) expect_equal(fndistinct(c(NA,TRUE,FALSE,NA)), 2) expect_equal(fndistinct(c(NA,FALSE,TRUE,NA)), 2) expect_equal(fndistinct(c(NA,FALSE,FALSE,NA)), 1) # expect_equal(max(fndistinct(mNA > 10)), 1) # These tests are insecure to random number generation # expect_equal(max(fndistinct(mNA > 10, g)), 1) expect_equal(fndistinct(NA, na.rm = FALSE), 1) expect_equal(fndistinct(NaN, na.rm = FALSE), 1) expect_equal(fndistinct(Inf, na.rm = FALSE), 1) expect_equal(fndistinct(-Inf, na.rm = FALSE), 1) expect_equal(fndistinct(TRUE, na.rm = FALSE), 1) expect_equal(fndistinct(FALSE, na.rm = FALSE), 1) expect_equal(fndistinct(c(TRUE,TRUE), na.rm = FALSE), 1) expect_equal(fndistinct(c(TRUE,FALSE), na.rm = FALSE), 2) expect_equal(fndistinct(c(FALSE,TRUE), na.rm = FALSE), 2) expect_equal(fndistinct(c(FALSE,FALSE), na.rm = FALSE), 1) expect_equal(fndistinct(c(NA,TRUE,TRUE,NA), na.rm = FALSE), 2) expect_equal(fndistinct(c(NA,TRUE,FALSE,NA), na.rm = FALSE), 3) expect_equal(fndistinct(c(NA,FALSE,TRUE,NA), na.rm = FALSE), 3) expect_equal(fndistinct(c(NA,FALSE,FALSE,NA), na.rm = FALSE), 2) # expect_equal(max(fndistinct(mNA > 10, na.rm = FALSE)), 2) # expect_equal(max(fndistinct(mNA > 10, g, na.rm = FALSE)), 2) }) test_that("fndistinct produces errors for wrong input", { expect_visible(fndistinct("a")) expect_visible(fndistinct(NA_character_)) expect_visible(fndistinct(mNA)) expect_visible(fndistinct(mNA, g)) expect_error(fndistinct(1:2,1:3)) expect_error(fndistinct(m,1:31)) expect_error(fndistinct(m, 1)) expect_error(fndistinct(data,1:31)) expect_visible(fndistinct(wlddev)) expect_visible(fndistinct(wlddev, wlddev$iso3c)) }) collapse/tests/testthat/test-varying.R0000644000176200001440000003023314066133206017637 0ustar liggesuserscontext("varying") # rm(list = ls()) if(identical(Sys.getenv("NCRAN"), "TRUE")) pwlddev <- eval(parse(text = paste0("plm", ":", ":", "pdata.frame(wlddev, index = c('iso3c', 'year'))"))) gwlddev <- fgroup_by(wlddev, iso3c) wdm <- qM(`cat_vars<-`(wlddev, dapply(cat_vars(wlddev), qG))) g <- GRP(wlddev, ~ region + year) test_that("vector, matrix and data.frame methods work as intended", { expect_true(all(dapply(wlddev, varying))) expect_true(all(varying(wlddev))) expect_true(all(varying(wdm))) expect_true(is.atomic(varying(wlddev, drop = TRUE))) expect_true(is.atomic(varying(wdm, drop = TRUE))) expect_true(is.data.frame(varying(wlddev, drop = FALSE))) expect_true(is.matrix(varying(wdm, drop = FALSE))) expect_true(all_identical(dapply(wlddev, varying), varying(wlddev), varying(wdm))) expect_true(all_identical(dapply(wlddev, varying, drop = FALSE), varying(wlddev, drop = FALSE), qDF(varying(wdm, drop = FALSE)))) expect_equal(dapply(unattrib(wlddev), varying, wlddev$iso3c), c(FALSE,FALSE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_true(all_identical(dapply(wlddev, varying, wlddev$iso3c), varying(wlddev, wlddev$iso3c), varying(wdm, wlddev$iso3c))) expect_true(all_identical(dapply(wlddev, varying, wlddev$iso3c, drop = FALSE), varying(wlddev, wlddev$iso3c, drop = FALSE), qDF(varying(wdm, wlddev$iso3c, drop = FALSE)))) expect_true(all_identical(qM(dapply(wlddev, varying, wlddev$iso3c, any_group = FALSE)), qM(varying(wlddev, wlddev$iso3c, any_group = FALSE)), varying(wdm, wlddev$iso3c, any_group = FALSE))) expect_true(all_identical(qM(dapply(wlddev, varying, wlddev$iso3c, any_group = FALSE, drop = FALSE)), qM(varying(wlddev, wlddev$iso3c, any_group = FALSE, drop = FALSE)), varying(wdm, wlddev$iso3c, any_group = FALSE, drop = FALSE))) expect_true(all_identical(qM(dapply(wlddev, varying, wlddev$iso3c, any_group = FALSE, use.g.names = FALSE)), qM(varying(wlddev, wlddev$iso3c, any_group = FALSE, use.g.names = FALSE)), varying(wdm, wlddev$iso3c, any_group = FALSE, use.g.names = FALSE))) expect_true(all_identical(qM(dapply(wlddev, varying, wlddev$iso3c, any_group = FALSE, use.g.names = FALSE, drop = FALSE)), qM(varying(wlddev, wlddev$iso3c, any_group = FALSE, use.g.names = FALSE, drop = FALSE)), varying(wdm, wlddev$iso3c, any_group = FALSE, use.g.names = FALSE, drop = FALSE))) # With grouping objects... expect_equal(dapply(unattrib(wlddev), varying, g), c(TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_true(all_identical(dapply(wlddev, varying, g), varying(wlddev, g), varying(wdm, g))) expect_true(all_identical(dapply(wlddev, varying, g, drop = FALSE), varying(wlddev, g, drop = FALSE), qDF(varying(wdm, g, drop = FALSE)))) expect_true(all_identical(qM(dapply(wlddev, varying, g, any_group = FALSE)), qM(varying(wlddev, g, any_group = FALSE)), varying(wdm, g, any_group = FALSE))) expect_true(all_identical(qM(dapply(wlddev, varying, g, any_group = FALSE, drop = FALSE)), qM(varying(wlddev, g, any_group = FALSE, drop = FALSE)), varying(wdm, g, any_group = FALSE, drop = FALSE))) expect_true(all_identical(qM(dapply(wlddev, varying, g, any_group = FALSE, use.g.names = FALSE)), qM(varying(wlddev, g, any_group = FALSE, use.g.names = FALSE)), varying(wdm, g, any_group = FALSE, use.g.names = FALSE))) expect_true(all_identical(qM(dapply(wlddev, varying, g, any_group = FALSE, use.g.names = FALSE, drop = FALSE)), qM(varying(wlddev, g, any_group = FALSE, use.g.names = FALSE, drop = FALSE)), varying(wdm, g, any_group = FALSE, use.g.names = FALSE, drop = FALSE))) }) test_that("data.frame method formula and cols work as intended", { expect_equal(varying(wlddev, cols = 2:5), varying(get_vars(wlddev, 2:5))) expect_equal(varying(wlddev, cols = c("PCGDP","country")), varying(get_vars(wlddev, c("PCGDP","country")))) expect_equal(varying(wlddev, cols = is.numeric), varying(num_vars(wlddev))) expect_equal(varying(wlddev, ~iso3c), varying(fselect(wlddev, -iso3c), wlddev$iso3c)) expect_equal(varying(wlddev, PCGDP + country ~ iso3c), varying(fselect(wlddev, PCGDP, country), wlddev$iso3c)) expect_equal(varying(wlddev, PCGDP + country ~ iso3c), varying(wlddev, ~ iso3c, cols = c("PCGDP", "country"))) expect_equal(varying(wlddev, ~iso3c, any_group = FALSE), varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE)) expect_equal(varying(wlddev, PCGDP + country ~ iso3c, any_group = FALSE), varying(fselect(wlddev, PCGDP, country), wlddev$iso3c, any_group = FALSE)) expect_equal(varying(wlddev, PCGDP + country ~ iso3c, any_group = FALSE), varying(wlddev, ~ iso3c, cols = c("PCGDP", "country"), any_group = FALSE)) expect_equal(varying(wlddev, ~region + year), varying(fselect(wlddev, -region, -year), g)) expect_equal(varying(wlddev, PCGDP + country ~ region + year), varying(fselect(wlddev, PCGDP, country), g)) expect_equal(varying(wlddev, PCGDP + country ~ region + year), varying(wlddev, ~ region + year, cols = c("PCGDP", "country"))) expect_equal(varying(wlddev, ~region + year, any_group = FALSE), varying(fselect(wlddev, -region, -year),g, any_group = FALSE)) expect_equal(varying(wlddev, PCGDP + country ~ region + year, any_group = FALSE), varying(fselect(wlddev, PCGDP, country), g, any_group = FALSE)) expect_equal(varying(wlddev, PCGDP + country ~ region + year, any_group = FALSE), varying(wlddev, ~ region + year, cols = c("PCGDP", "country"), any_group = FALSE)) expect_error(varying(wlddev, ~ iso3c2)) expect_error(varying(wlddev, PCGDP + country ~ iso3c2)) expect_error(varying(wlddev, PCGDP + country2 ~ iso3c)) expect_error(varying(wlddev, ~ iso3c, cols = c("PCGDP", "country2"))) expect_error(varying(wlddev, ~ region2 + year)) expect_error(varying(wlddev, PCGDP + country ~ region2 + year)) expect_error(varying(wlddev, PCGDP + country2 ~ region3 + year)) expect_error(varying(wlddev, ~ region + year, cols = c("PCGDP", "country2"))) }) if(identical(Sys.getenv("NCRAN"), "TRUE")) { test_that("pseries and pdata.frame methods work as intended", { # pdata.frame expect_equal(unattrib(varying(pwlddev)), c(FALSE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_equal(unattrib(varying(pwlddev, effect = "iso3c")), c(FALSE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_equal(unattrib(varying(pwlddev, effect = 2L)), c(TRUE,TRUE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_equal(unattrib(varying(pwlddev, effect = "year")), c(TRUE,TRUE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_true(is.atomic(varying(pwlddev, drop = TRUE))) expect_true(is.data.frame(varying(pwlddev, drop = FALSE))) expect_true(is.data.frame(varying(pwlddev, any_group = FALSE))) atrapply <- function(X, FUN, ...) { res <- vector("list", fncol(X)) for(i in seq_col(X)) { res[[i]] <- FUN(X[[i]], ...) } res } # Making sure fselect and get_vars etc. work properly. expect_identical(attributes(fselect(pwlddev, country:POP)), attributes(pwlddev)) expect_identical(attributes(get_vars(pwlddev, seq_col(pwlddev))), attributes(pwlddev)) # pseries expect_equal(unlist(atrapply(fselect(pwlddev, -iso3c), varying)), c(FALSE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_equal(unlist(atrapply(fselect(pwlddev, -iso3c), varying, effect = "iso3c")), c(FALSE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_equal(unlist(atrapply(fselect(pwlddev, -year), varying, effect = 2L)), c(TRUE,TRUE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_equal(unlist(atrapply(fselect(pwlddev, -year), varying, effect = "year")), c(TRUE,TRUE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_equal(varying(pwlddev$PCGDP), varying(wlddev$PCGDP, wlddev$iso3c)) expect_equal(varying(pwlddev$PCGDP, any_group = FALSE), varying(wlddev$PCGDP, wlddev$iso3c, any_group = FALSE)) expect_equal(varying(pwlddev$PCGDP, any_group = FALSE, use.g.names = FALSE), varying(wlddev$PCGDP, wlddev$iso3c, any_group = FALSE, use.g.names = FALSE)) expect_equal(lengths(varying(pwlddev, any_group = FALSE), FALSE), lengths(atrapply(fselect(pwlddev, -iso3c), varying, any_group = FALSE))) # pdata.frame works like data.frame expect_identical(unattrib(varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE)), unattrib(varying(pwlddev, any_group = FALSE))) expect_identical(unattrib(varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE, drop = FALSE)), unattrib(varying(pwlddev, any_group = FALSE, drop = FALSE))) expect_identical(unattrib(varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE, use.g.names = FALSE)), unattrib(varying(pwlddev, any_group = FALSE, use.g.names = FALSE))) expect_identical(unattrib(varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE, use.g.names = FALSE, drop = FALSE)), unattrib(varying(pwlddev, any_group = FALSE, use.g.names = FALSE, drop = FALSE))) expect_identical(unattrib(varying(fselect(wlddev, -year), wlddev$year, any_group = FALSE)), unattrib(varying(pwlddev, any_group = FALSE, effect = "year"))) expect_identical(unattrib(varying(fselect(wlddev, -year), wlddev$year, any_group = FALSE, drop = FALSE)), unattrib(varying(pwlddev, any_group = FALSE, drop = FALSE, effect = "year"))) expect_identical(unattrib(varying(fselect(wlddev, -year), wlddev$year, any_group = FALSE, use.g.names = FALSE)), unattrib(varying(pwlddev, any_group = FALSE, use.g.names = FALSE, effect = "year"))) expect_identical(unattrib(varying(fselect(wlddev, -year), wlddev$year, any_group = FALSE, use.g.names = FALSE, drop = FALSE)), unattrib(varying(pwlddev, any_group = FALSE, use.g.names = FALSE, drop = FALSE, effect = "year"))) }) } test_that("grouped_df method works as intended", { expect_equal(unattrib(varying(gwlddev)), c(FALSE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_true(is.atomic(varying(gwlddev, drop = TRUE))) expect_true(is.data.frame(varying(gwlddev, drop = FALSE))) expect_true(is.data.frame(varying(gwlddev, any_group = FALSE))) expect_identical(names(varying(gwlddev)), names(wlddev)[-2L]) expect_identical(names(varying(get_vars(gwlddev, 9:12))), names(wlddev)[9:12]) expect_identical(names(varying(gwlddev, any_group = FALSE)), c("iso3c", names(wlddev)[-2L])) expect_identical(names(varying(gwlddev, any_group = FALSE, keep.group_vars = FALSE)), names(wlddev)[-2L]) expect_identical(names(varying(get_vars(gwlddev, 9:12), any_group = FALSE)), c("iso3c", names(wlddev)[9:12])) expect_identical(names(varying(get_vars(gwlddev, 9:12), any_group = FALSE, keep.group_vars = FALSE)), names(wlddev)[9:12]) # grouped_df works like data.frame expect_identical(unattrib(varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE)), unattrib(varying(gwlddev, any_group = FALSE, keep.group_vars = FALSE))) expect_identical(unattrib(varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE, drop = FALSE)), unattrib(varying(gwlddev, any_group = FALSE, drop = FALSE, keep.group_vars = FALSE))) expect_identical(unclass(varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE, use.g.names = FALSE)), unclass(fungroup(varying(gwlddev, any_group = FALSE, use.g.names = FALSE, keep.group_vars = FALSE)))) expect_identical(unclass(varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE, drop = FALSE)), unclass(fungroup(varying(gwlddev, any_group = FALSE, use.g.names = TRUE, drop = FALSE, keep.group_vars = FALSE)))) }) collapse/tests/testthat/test-fHDbetween-fHDwithin-HDB-HDW.R0000644000176200001440000016135514201327077023104 0ustar liggesuserscontext("fhdbetween / HDB and fhdwithin / HDW") # rm(list = ls()) # TODO: Sort out why certain tests fail... failtests = FALSE options(warn = -1) set.seed(101) x <- rnorm(100) w <- abs(100*rnorm(100)) wdat <- abs(100*rnorm(32)) xNA <- x wNA <- w wdatNA <- wdat xNA[sample.int(100,20)] <- NA wNA[sample.int(100,20)] <- NA wdatNA[sample.int(32, 5)] <- NA f <- as.factor(rep(1:10, each = 10)) g <- as.factor(rep(c(1,2,2,3,3,3,4,4,4,4,5,5,5,5,5,7,7,7,7,7,7,7,10,10,10,10,10,10,10,10,10,10))) mtcNA <- na_insert(mtcars) mtcNA[1,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" baseresid <- function(y, X, na.rm = FALSE) { y <- qM(y) if(is.list(X)) X <- do.call(cbind, X) X <- cbind(Intercept = 1L, X) if(na.rm) { cc <- complete.cases(y, X) y <- y[cc, , drop = FALSE] X <- X[cc, , drop = FALSE] } drop(qr.resid(qr.default(X), y)) } basefitted <- function(y, X, na.rm = FALSE) { y <- qM(y) if(is.list(X)) X <- do.call(cbind, X) X <- cbind(Intercept = 1L, X) if(na.rm) { cc <- complete.cases(y, X) y <- y[cc, , drop = FALSE] X <- X[cc, , drop = FALSE] } drop(qr.fitted(qr.default(X), y)) } # fhdbetween and fhdwithin test_that("fhdbetween with one factor performs like fbetween", { expect_equal(fhdbetween(x, f), fbetween(x, f)) expect_equal(fhdbetween(x, f, na.rm = FALSE), fbetween(x, f, na.rm = FALSE)) expect_equal(fhdbetween(xNA, f, na.rm = FALSE), fbetween(xNA, f, na.rm = FALSE)) expect_equal(`attributes<-`(fhdbetween(xNA, f, fill = TRUE), NULL), fbetween(xNA, f)) expect_equal(fhdbetween(m, g), fbetween(m, g)) expect_equal(fhdbetween(m, g, na.rm = FALSE), fbetween(m, g, na.rm = FALSE)) expect_equal(fhdbetween(mNA, g, na.rm = FALSE), fbetween(mNA, g, na.rm = FALSE)) # expect_equal(fhdbetween(mNA, g, fill = TRUE), fbetween(mNA, g)) # not matching, fhdbetween matrix is not variable.wise expect_equal(fhdbetween(mtcars, g), fbetween(mtcars, g)) expect_equal(fhdbetween(mtcars, g, na.rm = FALSE), fbetween(mtcars, g, na.rm = FALSE)) expect_equal(fhdbetween(mtcNA, g, na.rm = FALSE), fbetween(mtcNA, g, na.rm = FALSE)) expect_equal(fhdbetween(mtcNA, g, variable.wise = TRUE), fbetween(mtcNA, g)) # with weights expect_equal(fhdbetween(x, f, w), fbetween(x, f, w)) expect_equal(fhdbetween(x, f, w, na.rm = FALSE), fbetween(x, f, w, na.rm = FALSE)) expect_equal(fhdbetween(xNA, f, w, na.rm = FALSE), fbetween(xNA, f, w, na.rm = FALSE)) expect_equal(`attributes<-`(fhdbetween(xNA, f, w, fill = TRUE), NULL), fbetween(xNA, f, w)) expect_equal(fhdbetween(m, g, wdat), fbetween(m, g, wdat)) expect_equal(fhdbetween(m, g, wdat, na.rm = FALSE), fbetween(m, g, wdat, na.rm = FALSE)) expect_equal(fhdbetween(mNA, g, wdat, na.rm = FALSE), fbetween(mNA, g, wdat, na.rm = FALSE)) # expect_equal(fhdbetween(mNA, g, fill = TRUE), fbetween(mNA, g)) # not matching, fhdbetween matrix is not variable.wise expect_equal(fhdbetween(mtcars, g, wdat), fbetween(mtcars, g, wdat)) expect_equal(fhdbetween(mtcars, g, wdat, na.rm = FALSE), fbetween(mtcars, g, wdat, na.rm = FALSE)) expect_equal(fhdbetween(mtcNA, g, wdat, na.rm = FALSE), fbetween(mtcNA, g, wdat, na.rm = FALSE)) expect_equal(fhdbetween(mtcNA, g, wdat, variable.wise = TRUE), fbetween(mtcNA, g, wdat)) }) test_that("fhdwithin with one factor performs like fwithin", { expect_equal(fhdwithin(x, f), fwithin(x, f)) expect_equal(fhdwithin(x, f, na.rm = FALSE), fwithin(x, f, na.rm = FALSE)) expect_equal(fhdwithin(xNA, f, na.rm = FALSE), fwithin(xNA, f, na.rm = FALSE)) expect_equal(`attributes<-`(fhdwithin(xNA, f, fill = TRUE), NULL), fwithin(xNA, f)) expect_equal(fhdwithin(m, g), fwithin(m, g)) expect_equal(fhdwithin(m, g, na.rm = FALSE), fwithin(m, g, na.rm = FALSE)) expect_equal(fhdwithin(mNA, g, na.rm = FALSE), fwithin(mNA, g, na.rm = FALSE)) # expect_equal(fhdwithin(mNA, g, fill = TRUE), fwithin(mNA, g)) # not matching, fhdwithin matrix is not variable.wise expect_equal(fhdwithin(mtcars, g), fwithin(mtcars, g)) expect_equal(fhdwithin(mtcars, g, na.rm = FALSE), fwithin(mtcars, g, na.rm = FALSE)) expect_equal(fhdwithin(mtcNA, g, na.rm = FALSE), fwithin(mtcNA, g, na.rm = FALSE)) expect_equal(fhdwithin(mtcNA, g, variable.wise = TRUE), fwithin(mtcNA, g)) # with weights expect_equal(fhdwithin(x, f, w), fwithin(x, f, w)) expect_equal(fhdwithin(x, f, w, na.rm = FALSE), fwithin(x, f, w, na.rm = FALSE)) expect_equal(fhdwithin(xNA, f, w, na.rm = FALSE), fwithin(xNA, f, w, na.rm = FALSE)) expect_equal(`attributes<-`(fhdwithin(xNA, f, w, fill = TRUE), NULL), fwithin(xNA, f, w)) expect_equal(fhdwithin(m, g, wdat), fwithin(m, g, wdat)) expect_equal(fhdwithin(m, g, wdat, na.rm = FALSE), fwithin(m, g, wdat, na.rm = FALSE)) expect_equal(fhdwithin(mNA, g, wdat, na.rm = FALSE), fwithin(mNA, g, wdat, na.rm = FALSE)) # expect_equal(fhdwithin(mNA, g, wdat, fill = TRUE), fwithin(mNA, g)) # not matching, wdat, fhdwithin matrix is not variable.wise expect_equal(fhdwithin(mtcars, g, wdat), fwithin(mtcars, g, wdat)) expect_equal(fhdwithin(mtcars, g, wdat, na.rm = FALSE), fwithin(mtcars, g, wdat, na.rm = FALSE)) expect_equal(fhdwithin(mtcNA, g, wdat, na.rm = FALSE), fwithin(mtcNA, g, wdat, na.rm = FALSE)) expect_equal(fhdwithin(mtcNA, g, wdat, variable.wise = TRUE), fwithin(mtcNA, g, wdat)) }) set.seed(101) f2 <- qF(sample.int(10, 100, TRUE)) fl <- list(f, f2) g2 <- qF(sample.int(5, 32, TRUE)) gl <- list(g, g2) # This is to fool very silly checks on CRAN scanning the code of the tests if(identical(Sys.getenv("LOCAL"), "TRUE")) demeanlist <- eval(parse(text = paste0("lfe", ":", ":", "demeanlist"))) tol <- if(identical(Sys.getenv("LOCAL"), "TRUE")) 1e-5 else 1e-4 demean <- fixest::demean # eval(parse(text = paste0("fixest", ":", ":", "demean"))) # lfe is back on CRAN: This now also seems to produce a warning !!!!!!! if(identical(Sys.getenv("LOCAL"), "TRUE")) test_that("fhdbetween with two factors performs like demeanlist", { expect_equal(fhdbetween(x, fl), demeanlist(x, fl, means = TRUE), tolerance = tol) expect_equal(fhdbetween(xNA, fl), demeanlist(xNA, fl, means = TRUE, na.rm = TRUE), tolerance = tol) expect_visible(fhdbetween(xNA, fl, fill = TRUE)) expect_equal(fhdbetween(m, gl), demeanlist(m, gl, means = TRUE), tolerance = tol) expect_equal(fhdbetween(mNA, gl, na.rm = FALSE), demeanlist(mNA, gl, means = TRUE), tolerance = tol) expect_equal(fhdbetween(mNA, gl), demeanlist(mNA, gl, means = TRUE, na.rm = TRUE), tolerance = tol) expect_visible(fhdbetween(mNA, gl, fill = TRUE)) expect_equal(fhdbetween(mtcars, gl), demeanlist(mtcars, gl, means = TRUE), tolerance = tol) expect_equal(fhdbetween(mtcNA, gl, na.rm = FALSE), demeanlist(mtcNA, gl, means = TRUE), tolerance = tol) expect_equal(setRownames(fhdbetween(mtcNA, gl)), demeanlist(mtcNA, gl, means = TRUE, na.rm = TRUE), tolerance = tol) expect_visible(fhdbetween(mtcNA, gl, fill = TRUE)) expect_visible(fhdbetween(mtcNA, gl, variable.wise = TRUE)) # With weights expect_equal(fhdbetween(x, fl, w), drop(x - demean(x, fl, weights = w)), tolerance = tol) expect_equal(unattrib(fhdbetween(xNA, fl, w)), drop(na_rm(xNA) - demean(xNA, fl, weights = w, na.rm = TRUE)), tolerance = tol) expect_visible(fhdbetween(xNA, fl, w, fill = TRUE)) expect_equal(fhdbetween(m, gl, wdat), m - demean(m, gl, weights = wdat), tolerance = tol) expect_equal(fhdbetween(mNA, gl, wdat, na.rm = FALSE), demeanlist(mNA, gl, weights = wdat, means = TRUE), tolerance = tol) expect_equal(unattrib(fhdbetween(mNA, gl, wdat)), unattrib(na_omit(mNA) - demean(mNA, gl, weights = wdat, na.rm = TRUE)), tolerance = tol) expect_visible(fhdbetween(mNA, gl, wdat, fill = TRUE)) # This one is a bug in demean and will be fixed soon... expect_equal(fhdbetween(mtcars, gl, wdat), mtcars %c-% demean(mtcars, gl, weights = wdat), tolerance = tol) expect_equal(fhdbetween(mtcNA, gl, na.rm = FALSE), demeanlist(mtcNA, gl, weights = wdat, means = TRUE), tolerance = tol) # Same here expect_equal(unattrib(fhdbetween(mtcNA, gl, wdat)), unattrib(na_omit(mtcNA) %c-% demean(mtcNA, gl, weights = wdat, na.rm = TRUE)), tolerance = tol) expect_visible(fhdbetween(mtcNA, gl, wdat, fill = TRUE)) expect_visible(fhdbetween(mtcNA, gl, wdat, variable.wise = TRUE)) }) test_that("fhdwithin with two factors performs like demean", { expect_equal(fhdwithin(x, fl), drop(demean(x, fl)), tolerance = tol) expect_equal(unattrib(fhdwithin(xNA, fl)), unattrib(demean(xNA, fl, na.rm = TRUE)), tolerance = tol) expect_identical(length(fhdwithin(xNA, fl, fill = TRUE)), length(xNA)) expect_equal(unattrib(fhdwithin(m, gl)), unattrib(demean(m, gl)), tolerance = tol) # expect_equal(fhdwithin(mNA, gl, na.rm = FALSE), demean(mNA, gl), tolerance = tol) # can break R expect_equal(unattrib(fhdwithin(mNA, gl)), unattrib(demean(mNA, gl, na.rm = TRUE)), tolerance = tol) expect_identical(nrow(fhdwithin(mNA, gl, fill = TRUE)), nrow(mNA)) expect_equal(unattrib(fhdwithin(mtcars, gl)), unattrib(demean(mtcars, gl)), tolerance = tol) # expect_equal(fhdwithin(mtcNA, gl, na.rm = FALSE), demean(mtcNA, gl), tolerance = tol) # can break R expect_equal(unattrib(fhdwithin(mtcNA, gl)), unattrib(demean(mtcNA, gl, na.rm = TRUE)), tolerance = tol) expect_equal(fnrow(fhdwithin(mtcNA, gl, fill = TRUE)), fnrow(mtcNA)) expect_identical(fnrow(fhdwithin(mtcNA, gl, variable.wise = TRUE)), fnrow(mtcNA)) # With weights expect_equal(fhdwithin(x, fl, w), drop(demean(x, fl, weights = w)), tolerance = tol) expect_equal(unattrib(fhdwithin(xNA, fl, w)), unattrib(demean(xNA, fl, weights = w, na.rm = TRUE)), tolerance = tol) expect_identical(length(fhdwithin(xNA, fl, w, fill = TRUE)), length(xNA)) expect_equal(unattrib(fhdwithin(m, gl, wdat)), unattrib(demean(m, gl, weights = wdat)), tolerance = tol) # expect_equal(fhdwithin(mNA, gl, wdat, na.rm = FALSE), demean(mNA, gl, weights = wdat), tolerance = tol) # can break R cc <- complete.cases(mNA) expect_equal(unattrib(fhdwithin(mNA, gl, wdat)), unattrib(demean(mNA[cc, ], lapply(gl, .subset, cc), weights = wdat[cc])), tolerance = tol) expect_identical(nrow(fhdwithin(mNA, gl, wdat, fill = TRUE)), nrow(mNA)) # Smae here, bug to be fixed in demean() expect_equal(unattrib(fhdwithin(mtcars, gl, wdat)), unattrib(demean(mtcars, gl, weights = wdat)), tolerance = tol) # expect_equal(fhdwithin(mtcNA, gl, wdat, na.rm = FALSE), demean(mtcNA, gl, weights = wdat), tolerance = tol) # can break R # Also bug expect_equal(unattrib(fhdwithin(mtcNA, gl, wdat)), unattrib(demean(mtcNA, gl, weights = wdat, na.rm = TRUE)), tolerance = 1e-3) expect_equal(fnrow(fhdwithin(mtcNA, gl, wdat, fill = TRUE)), fnrow(mtcNA)) expect_identical(fnrow(fhdwithin(mtcNA, gl, wdat, variable.wise = TRUE)), fnrow(mtcNA)) }) x2 <- 3 * x + rnorm(100) test_that("fhdbetween with only continuous variables performs like basefitted (defined above)", { expect_equal(fhdbetween(x, x2), basefitted(x, x2), tolerance = tol) expect_equal(`attr<-`(fhdbetween(xNA, x2), "na.rm", NULL), basefitted(xNA, x2, na.rm = TRUE), tolerance = tol) expect_visible(fhdbetween(xNA, x2, fill = TRUE)) expect_equal(fhdbetween(m, m), fhdbetween(m, mtcars), tolerance = tol) expect_equal(fhdbetween(m, m), basefitted(m, m), tolerance = tol) expect_equal(`attr<-`(fhdbetween(mNA, m, lm.method = "qr"), "na.rm", NULL), basefitted(mNA, m, na.rm = TRUE), tolerance = tol) expect_equal(fhdbetween(mNA, m, fill = TRUE, lm.method = "qr"), fhdbetween(mNA, mtcars, fill = TRUE, lm.method = "qr"), tolerance = tol) expect_equal(fhdbetween(mtcars, mtcars), fhdbetween(mtcars, m), tolerance = tol) expect_equal(fhdbetween(mtcars, mtcars), qDF(basefitted(mtcars, mtcars)), tolerance = tol) expect_equal(`attr<-`(fhdbetween(mtcNA, mtcars, lm.method = "qr"), "na.rm", NULL), qDF(basefitted(mtcNA, mtcars, na.rm = TRUE)), tolerance = tol) expect_equal(fhdbetween(mtcNA, mtcars, fill = TRUE, lm.method = "qr"), fhdbetween(mtcNA, m, fill = TRUE, lm.method = "qr"), tolerance = tol) expect_equal(fhdbetween(mtcNA, mtcars, variable.wise = TRUE), fhdbetween(mtcNA, m, variable.wise = TRUE), tolerance = tol) }) test_that("fhdwithin with only continuous variables performs like baseresid (defined above)", { expect_equal(fhdwithin(x, x2), baseresid(x, x2), tolerance = tol) expect_equal(`attr<-`(fhdwithin(xNA, x2), "na.rm", NULL), baseresid(xNA, x2, na.rm = TRUE), tolerance = tol) expect_visible(fhdwithin(xNA, x2, fill = TRUE)) expect_equal(fhdwithin(m, m), fhdwithin(m, mtcars), tolerance = tol) expect_equal(fhdwithin(m, m), baseresid(m, m), tolerance = tol) expect_equal(`attr<-`(fhdwithin(mNA, m, lm.method = "qr"), "na.rm", NULL), baseresid(mNA, m, na.rm = TRUE), tolerance = tol) expect_equal(fhdwithin(mNA, m, fill = TRUE, lm.method = "qr"), fhdwithin(mNA, mtcars, fill = TRUE, lm.method = "qr"), tolerance = tol) expect_equal(fhdwithin(mtcars, mtcars), fhdwithin(mtcars, m), tolerance = tol) expect_equal(fhdwithin(mtcars, mtcars), qDF(baseresid(mtcars, mtcars)), tolerance = tol) expect_equal(`attr<-`(fhdwithin(mtcNA, mtcars, lm.method = "qr"), "na.rm", NULL), qDF(baseresid(mtcNA, mtcars, na.rm = TRUE)), tolerance = tol) expect_equal(fhdwithin(mtcNA, mtcars, fill = TRUE, lm.method = "qr"), fhdwithin(mtcNA, m, fill = TRUE, lm.method = "qr"), tolerance = tol) expect_equal(fhdwithin(mtcNA, mtcars, variable.wise = TRUE), fhdwithin(mtcNA, m, variable.wise = TRUE), tolerance = tol) }) data <- wlddev data$year <- qF(data$year) data <- get_vars(data, c("iso3c","year","region","income","PCGDP","LIFEEX","ODA")) ww <- abs(rnorm(fnrow(data))) wi <- abs(rnorm(fnrow(iris))) test_that("fhdbetween with multiple variables performs like lm", { expect_equal(fhdbetween(iris$Sepal.Length, iris[-1]), `names<-`(fitted(lm(Sepal.Length ~., iris)), NULL), tolerance = tol) expect_equal(fhdbetween(iris[1], iris[-1])[[1]], `names<-`(fitted(lm(Sepal.Length ~., iris)), NULL), tolerance = tol) expect_equal(setRownames(qM(fhdbetween(iris[1:2], iris[-(1:2)]))), fitted(lm(cbind(Sepal.Length, Sepal.Width) ~., iris)), tolerance = tol) expect_equal(`attributes<-`(fhdbetween(data$PCGDP, data[-5]), NULL), `attributes<-`(fitted(lm(PCGDP ~., data)), NULL), tolerance = tol) expect_visible(fhdbetween(data$PCGDP, data[-5], fill = TRUE)) expect_equal(`attributes<-`(fhdbetween(data[5], data[-5])[[1]], NULL), `attributes<-`(fitted(lm(PCGDP ~., data)), NULL), tolerance = tol) expect_visible(fhdbetween(data[5], data[-5], fill = TRUE)) expect_equal(setRownames(qM(fhdbetween(data[5:6], data[-(5:6)]))), setRownames(fitted(lm(cbind(PCGDP, LIFEEX) ~., data))), tolerance = tol) expect_visible(fhdbetween(data[5:6], data[-(5:6)], fill = TRUE)) expect_visible(fhdbetween(data[5:6], data[-(5:6)], variable.wise = TRUE)) expect_equal(setRownames(qM(fhdbetween(data[5:7], data[-(5:7)]))), setRownames(fitted(lm(cbind(PCGDP, LIFEEX, ODA) ~., data))), tolerance = tol) expect_visible(fhdbetween(data[5:7], data[-(5:7)], fill = TRUE)) expect_visible(fhdbetween(data[5:7], data[-(5:7)], variable.wise = TRUE)) expect_equal(setRownames(qM(fhdbetween(data[5:6], data$ODA))), setRownames(fitted(lm(cbind(PCGDP, LIFEEX) ~., data[5:7]))), tolerance = tol) expect_equal(fhdbetween(data[5:6], data[7], fill = TRUE), fhdbetween(data[5:6], data$ODA, fill = TRUE), tolerance = tol) expect_equal(fhdbetween(data[5:6], data[7], variable.wise = TRUE), fhdbetween(data[5:6], data$ODA, variable.wise = TRUE), tolerance = tol) # With weights expect_equal(fhdbetween(iris$Sepal.Length, iris[-1], wi), `names<-`(fitted(lm(Sepal.Length ~., iris, weights = wi)), NULL), tolerance = tol) expect_equal(fhdbetween(iris[1], iris[-1], wi)[[1]], `names<-`(fitted(lm(Sepal.Length ~., iris, weights = wi)), NULL), tolerance = tol) expect_equal(setRownames(qM(fhdbetween(iris[1:2], iris[-(1:2)], wi))), fitted(lm(cbind(Sepal.Length, Sepal.Width) ~., iris, weights = wi)), tolerance = tol) expect_equal(`attributes<-`(fhdbetween(data$PCGDP, data[-5], ww), NULL), `attributes<-`(fitted(lm(PCGDP ~., data, weights = ww)), NULL), tolerance = tol) expect_visible(fhdbetween(data$PCGDP, data[-5], ww, fill = TRUE)) expect_equal(`attributes<-`(fhdbetween(data[5], data[-5], ww)[[1]], NULL), `attributes<-`(fitted(lm(PCGDP ~., data, weights = ww)), NULL), tolerance = tol) expect_visible(fhdbetween(data[5], data[-5], ww, fill = TRUE)) expect_equal(setRownames(qM(fhdbetween(data[5:6], data[-(5:6)], ww))), setRownames(fitted(lm(cbind(PCGDP, LIFEEX) ~., data, weights = ww))), tolerance = tol) expect_visible(fhdbetween(data[5:6], data[-(5:6)], ww, fill = TRUE)) expect_visible(fhdbetween(data[5:6], data[-(5:6)], ww, variable.wise = TRUE)) expect_equal(setRownames(qM(fhdbetween(data[5:7], data[-(5:7)], ww))), setRownames(fitted(lm(cbind(PCGDP, LIFEEX, ODA) ~., data, weights = ww))), tolerance = tol) expect_visible(fhdbetween(data[5:7], data[-(5:7)], ww, fill = TRUE)) expect_visible(fhdbetween(data[5:7], data[-(5:7)], ww, variable.wise = TRUE)) expect_equal(setRownames(qM(fhdbetween(data[5:6], data$ODA, ww))), setRownames(fitted(lm(cbind(PCGDP, LIFEEX) ~., data[5:7], weights = ww))), tolerance = tol) expect_equal(fhdbetween(data[5:6], data[7], ww, fill = TRUE), fhdbetween(data[5:6], data$ODA, ww, fill = TRUE), tolerance = tol) expect_equal(fhdbetween(data[5:6], data[7], ww, variable.wise = TRUE), fhdbetween(data[5:6], data$ODA, ww, variable.wise = TRUE), tolerance = tol) }) test_that("fhdwithin with multiple variables performs like lm", { expect_equal(fhdwithin(iris$Sepal.Length, iris[-1]), `names<-`(resid(lm(Sepal.Length ~., iris)), NULL), tolerance = tol) expect_equal(fhdwithin(iris[1], iris[-1])[[1]], `names<-`(resid(lm(Sepal.Length ~., iris)), NULL), tolerance = tol) expect_equal(setRownames(qM(fhdwithin(iris[1:2], iris[-(1:2)]))), resid(lm(cbind(Sepal.Length, Sepal.Width) ~., iris)), tolerance = tol) expect_equal(`attributes<-`(fhdwithin(data$PCGDP, data[-5]), NULL), `attributes<-`(resid(lm(PCGDP ~., data)), NULL), tolerance = tol) expect_visible(fhdwithin(data$PCGDP, data[-5], fill = TRUE)) expect_equal(`attributes<-`(fhdwithin(data[5], data[-5])[[1]], NULL), `attributes<-`(resid(lm(PCGDP ~., data)), NULL), tolerance = tol) expect_visible(fhdwithin(data[5], data[-5], fill = TRUE)) expect_equal(setRownames(qM(fhdwithin(data[5:6], data[-(5:6)]))), setRownames(resid(lm(cbind(PCGDP, LIFEEX) ~., data))), tolerance = tol) expect_visible(fhdwithin(data[5:6], data[-(5:6)], fill = TRUE)) expect_visible(fhdwithin(data[5:6], data[-(5:6)], variable.wise = TRUE)) expect_equal(setRownames(qM(fhdwithin(data[5:7], data[-(5:7)]))), setRownames(resid(lm(cbind(PCGDP, LIFEEX, ODA) ~., data))), tolerance = tol) expect_visible(fhdwithin(data[5:7], data[-(5:7)], fill = TRUE)) expect_visible(fhdwithin(data[5:7], data[-(5:7)], variable.wise = TRUE)) expect_equal(setRownames(qM(fhdwithin(data[5:6], data$ODA))), setRownames(resid(lm(cbind(PCGDP, LIFEEX) ~., data[5:7]))), tolerance = tol) expect_equal(fhdwithin(data[5:6], data[7], fill = TRUE), fhdwithin(data[5:6], data$ODA, fill = TRUE), tolerance = tol) expect_equal(fhdwithin(data[5:6], data[7], variable.wise = TRUE), fhdwithin(data[5:6], data$ODA, variable.wise = TRUE), tolerance = tol) # With weights expect_equal(fhdwithin(iris$Sepal.Length, iris[-1], wi), `names<-`(resid(lm(Sepal.Length ~., iris, weights = wi)), NULL), tolerance = tol) expect_equal(fhdwithin(iris[1], iris[-1], wi)[[1]], `names<-`(resid(lm(Sepal.Length ~., iris, weights = wi)), NULL), tolerance = tol) expect_equal(setRownames(qM(fhdwithin(iris[1:2], iris[-(1:2)], wi))), resid(lm(cbind(Sepal.Length, Sepal.Width) ~., iris, weights = wi)), tolerance = tol) expect_equal(`attributes<-`(fhdwithin(data$PCGDP, data[-5], ww), NULL), `attributes<-`(resid(lm(PCGDP ~., data, weights = ww)), NULL), tolerance = tol) expect_visible(fhdwithin(data$PCGDP, data[-5], ww, fill = TRUE)) expect_equal(`attributes<-`(fhdwithin(data[5], data[-5], ww)[[1]], NULL), `attributes<-`(resid(lm(PCGDP ~., data, weights = ww)), NULL), tolerance = tol) expect_visible(fhdwithin(data[5], data[-5], ww, fill = TRUE)) expect_equal(setRownames(qM(fhdwithin(data[5:6], data[-(5:6)], ww))), setRownames(resid(lm(cbind(PCGDP, LIFEEX) ~., data, weights = ww))), tolerance = tol) expect_visible(fhdwithin(data[5:6], data[-(5:6)], ww, fill = TRUE)) expect_visible(fhdwithin(data[5:6], data[-(5:6)], ww, variable.wise = TRUE)) expect_equal(setRownames(qM(fhdwithin(data[5:7], data[-(5:7)], ww))), setRownames(resid(lm(cbind(PCGDP, LIFEEX, ODA) ~., data, weights = ww))), tolerance = tol) expect_visible(fhdwithin(data[5:7], data[-(5:7)], ww, fill = TRUE)) expect_visible(fhdwithin(data[5:7], data[-(5:7)], ww, variable.wise = TRUE)) expect_equal(setRownames(qM(fhdwithin(data[5:6], data$ODA, ww))), setRownames(resid(lm(cbind(PCGDP, LIFEEX) ~., data[5:7], weights = ww))), tolerance = tol) expect_equal(fhdwithin(data[5:6], data[7], ww, fill = TRUE), fhdwithin(data[5:6], data$ODA, ww, fill = TRUE), tolerance = tol) expect_equal(fhdwithin(data[5:6], data[7], ww, variable.wise = TRUE), fhdwithin(data[5:6], data$ODA, ww, variable.wise = TRUE), tolerance = tol) }) test_that("fhdbetween produces errors for wrong input", { expect_visible(fhdbetween(1:2,1:2)) expect_error(fhdbetween("a", 1)) expect_error(fhdbetween(mNAc, f)) expect_error(fhdbetween(1:2,1:3)) expect_error(fhdbetween(m,1:31)) expect_error(fhdbetween(mNA,1:31)) expect_error(fhdbetween(mtcars,1:31)) # expect_warning(fhdbetween(1:2, 1:2, bla = 1)) expect_error(fhdbetween(wlddev, list(wlddev$iso3c, wlddev$income[1:10000]))) expect_visible(fhdbetween(1:2,1:2, na.rm = FALSE)) expect_error(fhdbetween("a", 1, na.rm = FALSE)) expect_error(fhdbetween(mNAc, f, na.rm = FALSE)) expect_error(fhdbetween(1:2,1:3, na.rm = FALSE)) expect_error(fhdbetween(m,1:31, na.rm = FALSE)) expect_error(fhdbetween(mNA,1:31, na.rm = FALSE)) expect_error(fhdbetween(mtcars,1:31, na.rm = FALSE)) # expect_warning(fhdbetween(1:2, 1:2, bla = 1, na.rm = FALSE)) # expect_error(fhdbetween(wlddev, list(wlddev$iso3c, wlddev$income[1:10000]), na.rm = FALSE)) # breaks R }) test_that("fhdwithin produces errors for wrong input", { expect_visible(fhdwithin(1:2,1:2)) expect_error(fhdwithin("a", 1)) expect_error(fhdwithin(mNAc, f)) expect_error(fhdwithin(1:2,1:3)) expect_error(fhdwithin(m,1:31)) expect_error(fhdwithin(mNA,1:31)) expect_error(fhdwithin(mtcars,1:31)) # expect_warning(fhdwithin(1:2, 1:2, bla = 1)) expect_error(fhdwithin(wlddev, list(wlddev$iso3c, wlddev$income[1:10000]))) expect_visible(fhdwithin(1:2,1:2, na.rm = FALSE)) expect_error(fhdwithin("a", 1, na.rm = FALSE)) expect_error(fhdwithin(mNAc, f, na.rm = FALSE)) expect_error(fhdwithin(1:2,1:3, na.rm = FALSE)) expect_error(fhdwithin(m,1:31, na.rm = FALSE)) expect_error(fhdwithin(mNA,1:31, na.rm = FALSE)) expect_error(fhdwithin(mtcars,1:31, na.rm = FALSE)) # expect_warning(fhdwithin(1:2, 1:2, bla = 1, na.rm = FALSE)) # expect_error(fhdwithin(wlddev, list(wlddev$iso3c, wlddev$income[1:10000]), na.rm = FALSE)) # segfault !!! }) if(identical(Sys.getenv("NCRAN"), "TRUE")) { # HDB and HDW test_that("HDW data.frame method (formula input) performs properly", { # simple lm, continuous vars expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ carb + gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + carb + gear + wt, mtcars))[2:3], tolerance = tol) # continuous interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ carb*gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + carb*gear + wt, mtcars))[2:3], tolerance = tol) # continuous 3-way interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ carb*gear*wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + carb*gear*wt, mtcars))[2:3], tolerance = tol) # factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl), mtcars))[2:3], tolerance = tol) # factor - continuous without including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl):carb, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb, mtcars))[2:3], tolerance = tol) # multiple factors - continuous without including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl):carb + qF(vs):carb, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb + qF(vs):carb, mtcars))[2:3], tolerance = tol) # multiple factors - continuous without including factor 2 expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl):carb + qF(vs):wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb + qF(vs):wt, mtcars))[2:3], tolerance = tol) # multiple factors - continuous without including factor 3 expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ am + qF(cyl):carb + qF(vs):wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + am + qF(cyl):carb + qF(vs):wt, mtcars))[2:3], tolerance = tol) # factor - continuous including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl):carb + qF(cyl), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb + qF(cyl), mtcars))[2:3], tolerance = tol) # factor - continuous full interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl)*carb, stub = FALSE, lm.method = "qr")))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl)*carb, mtcars))[2:3], tolerance = tol) # HD fixed effects expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs) + factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am), mtcars))[2:3], tolerance = tol) # HD fixed effects + factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs) + factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am), mtcars))[2:3], tolerance = tol) # 3 way factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs):factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):factor(am), mtcars))[2:3], tolerance = tol) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs):factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp , W(mtcars, ~ cyl + vs + am, stub = FALSE)))[2:3], tolerance = tol) # HD fixed effects and continuous variable expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, mtcars))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs):gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):gear + factor(am):carb + wt, mtcars))[2:3], tolerance = tol) # HD fixed effects and continuous variables and full interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs)*gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs)*gear + factor(am):carb + wt, mtcars))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions + factor interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs) + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am):carb + wt, mtcars))[2:3], tolerance = tol) # HD fixed effects and continuous variables and polynomaial interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, mtcars))[2:3], tolerance = tol) # 3-way interaction continuous-factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):vs:gear + factor(am):carb + wt, stub = FALSE, lm.method = "qr")))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):vs:gear + factor(am):carb + wt, mtcars))[2:3]) # 3-way interaction factor-continuous expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs):gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):gear + factor(am):carb + wt, mtcars))[2:3]) # With weights # simple lm, continuous vars expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ carb + gear + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb + gear + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # continuous interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ carb*gear + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb*gear + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # continuous 3-way interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ carb*gear*wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb*gear*wt, mtcars, weights = wdat))[2:3], tolerance = tol) # factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl), mtcars, weights = wdat))[2:3], tolerance = tol) # factor - continuous without including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl):carb, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb, mtcars, weights = wdat))[2:3], tolerance = tol) # factor - continuous including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl):carb + qF(cyl), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb + qF(cyl), mtcars, weights = wdat))[2:3], tolerance = tol) # factor - continuous full interaction if(failtests) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl)*carb, wdat, stub = FALSE, lm.method = "qr"), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl)*carb, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs) + factor(am), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am), mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects + factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs) + factor(am), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am), mtcars, weights = wdat))[2:3], tolerance = tol) # 3 way factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs):factor(am), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):factor(am), mtcars, weights = wdat))[2:3], tolerance = tol) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs):factor(am), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp , W(mtcars, ~ cyl + vs + am, wdat, stub = FALSE), weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variable expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs):gear + factor(am):carb + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):gear + factor(am):carb + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and full interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs)*gear + factor(am):carb + wt, wdat, stub = FALSE, lm.method = "qr"), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs)*gear + factor(am):carb + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions + factor interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs) + factor(am):carb + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am):carb + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and polynomaial interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # 3-way interaction continuous-factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):vs:gear + factor(am):carb + wt, wdat, stub = FALSE, lm.method = "qr"), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):vs:gear + factor(am):carb + wt, mtcars, weights = wdat))[2:3]) # 3-way interaction factor-continuous expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs):gear + factor(am):carb + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):gear + factor(am):carb + wt, mtcars, weights = wdat))[2:3]) }) test_that("HDW data.frame method (formula input) with 2-sided formula performs properly", { # simple lm, continuous vars expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ carb + gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + carb + gear + wt, mtcars))[2:3], tolerance = tol) # continuous interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ carb*gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + carb*gear + wt, mtcars))[2:3], tolerance = tol) # continuous 3-way interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ carb*gear*wt, stub = FALSE, lm.method = "qr")))[2:3], coef(lm(mpg ~ hp + disp + carb*gear*wt, mtcars))[2:3], tolerance = tol) # factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ qF(cyl), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl), mtcars))[2:3], tolerance = tol) # factor - continuous without including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ qF(cyl):carb, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb, mtcars))[2:3], tolerance = tol) # factor - continuous including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ qF(cyl):carb + qF(cyl), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb + qF(cyl), mtcars))[2:3], tolerance = tol) # factor - continuous full interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ qF(cyl)*carb, stub = FALSE, lm.method = "qr")))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl)*carb, mtcars))[2:3], tolerance = tol) # HD fixed effects expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs) + factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am), mtcars))[2:3], tolerance = tol) # HD fixed effects + factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs) + factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am), mtcars))[2:3], tolerance = tol) # 3 way factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs):factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):factor(am), mtcars))[2:3], tolerance = tol) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs):factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp , W(mtcars, ~ cyl + vs + am, stub = FALSE)))[2:3], tolerance = tol) # HD fixed effects and continuous variable expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, mtcars))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs):gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):gear + factor(am):carb + wt, mtcars))[2:3], tolerance = tol) # HD fixed effects and continuous variables and full interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs)*gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs)*gear + factor(am):carb + wt, mtcars))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions + factor interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs) + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am):carb + wt, mtcars))[2:3], tolerance = tol) # HD fixed effects and continuous variables and polynomaial interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, mtcars))[2:3], tolerance = tol) # 3-way interaction continuous-factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):vs:gear + factor(am):carb + wt, stub = FALSE, lm.method = "qr")))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):vs:gear + factor(am):carb + wt, mtcars))[2:3]) # 3-way interaction factor-continuous expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs):gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):gear + factor(am):carb + wt, mtcars))[2:3]) # With weights # simple lm, continuous vars expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ carb + gear + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb + gear + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # continuous interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ carb*gear + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb*gear + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # continuous 3-way interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ carb*gear*wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb*gear*wt, mtcars, weights = wdat))[2:3], tolerance = tol) # factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ qF(cyl), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl), mtcars, weights = wdat))[2:3], tolerance = tol) # factor - continuous without including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ qF(cyl):carb, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb, mtcars, weights = wdat))[2:3], tolerance = tol) # factor - continuous including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ qF(cyl):carb + qF(cyl), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb + qF(cyl), mtcars, weights = wdat))[2:3], tolerance = tol) # factor - continuous full interaction if(failtests) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ qF(cyl)*carb, wdat, stub = FALSE, lm.method = "qr"), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl)*carb, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs) + factor(am), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am), mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects + factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs) + factor(am), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am), mtcars, weights = wdat))[2:3], tolerance = tol) # 3 way factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs):factor(am), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):factor(am), mtcars, weights = wdat))[2:3], tolerance = tol) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs):factor(am), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp , W(mtcars, mpg + hp + disp ~ cyl + vs + am, wdat, stub = FALSE), weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variable expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs):gear + factor(am):carb + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):gear + factor(am):carb + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and full interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs)*gear + factor(am):carb + wt, wdat, stub = FALSE, lm.method = "qr"), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs)*gear + factor(am):carb + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions + factor interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs) + factor(am):carb + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am):carb + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and polynomaial interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # 3-way interaction continuous-factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):vs:gear + factor(am):carb + wt, wdat, stub = FALSE, lm.method = "qr"), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):vs:gear + factor(am):carb + wt, mtcars, weights = wdat))[2:3]) # 3-way interaction factor-continuous expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs):gear + factor(am):carb + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):gear + factor(am):carb + wt, mtcars, weights = wdat))[2:3]) }) test_that("HDW data.frame method (formula input) with 2-sided formula and missing values performs properly", { # simple lm, continuous vars expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ carb + gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + carb + gear + wt, mtcNA))[2:3], tolerance = tol) # continuous interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ carb*gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + carb*gear + wt, mtcNA))[2:3], tolerance = tol) # continuous 3-way interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ carb*gear*wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + carb*gear*wt, mtcNA))[2:3], tolerance = tol) # factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ qF(cyl), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl), mtcNA))[2:3], tolerance = tol) # factor - continuous without including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ qF(cyl):carb, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb, mtcNA))[2:3], tolerance = tol) # factor - continuous including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ qF(cyl):carb + qF(cyl), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb + qF(cyl), mtcNA))[2:3], tolerance = tol) # factor - continuous full interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ qF(cyl)*carb, stub = FALSE, lm.method = "qr")))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl)*carb, mtcNA))[2:3], tolerance = tol) # HD fixed effects expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs) + factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am), mtcNA))[2:3], tolerance = tol) # HD fixed effects + factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):factor(vs) + factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am), mtcNA))[2:3], tolerance = tol) # 3 way factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):factor(vs):factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):factor(am), mtcNA))[2:3], tolerance = tol) # HD fixed effects and continuous variable expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, mtcNA))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions : Somestimes test fails, I don't know why (maybe demeanlist numeric problem) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs):gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):gear + factor(am):carb + wt, mtcNA))[2:3], tolerance = tol) # HD fixed effects and continuous variables and full interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs)*gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs)*gear + factor(am):carb + wt, mtcNA))[2:3], tolerance = 1) # faile R CMD Arch i386 (32 Bit) # HD fixed effects and continuous variables and factor-continuous interactions + factor interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):factor(vs) + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am):carb + wt, mtcNA))[2:3], tolerance = 1e-2) # 3-way interaction continuous-factor: error if(failtests) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, ~ factor(cyl):vs:gear + factor(am):carb + wt, stub = FALSE, lm.method = "qr")))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):vs:gear + factor(am):carb + wt, mtcNA))[2:3]) # 3-way interaction factor-continuous: error if(failtests) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, ~ factor(cyl):factor(vs):gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):gear + factor(am):carb + wt, mtcNA))[2:3]) # With weights # simple lm, continuous vars expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ carb + gear + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb + gear + wt, mtcNA, weights = wdat))[2:3], tolerance = tol) # continuous interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ carb*gear + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb*gear + wt, mtcNA, weights = wdat))[2:3], tolerance = tol) # continuous 3-way interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ carb*gear*wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb*gear*wt, mtcNA, weights = wdat))[2:3], tolerance = tol) # factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ qF(cyl), wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl), mtcNA, weights = wdat))[2:3], tolerance = tol) # factor - continuous without including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ qF(cyl):carb, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb, mtcNA, weights = wdat))[2:3], tolerance = tol) # factor - continuous including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ qF(cyl):carb + qF(cyl), wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb + qF(cyl), mtcNA, weights = wdat))[2:3], tolerance = tol) # factor - continuous full interaction if(failtests) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ qF(cyl)*carb, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl)*carb, mtcNA, weights = wdat))[2:3], tolerance = tol) # HD fixed effects expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs) + factor(am), wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am), mtcNA, weights = wdat))[2:3], tolerance = tol) # HD fixed effects + factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):factor(vs) + factor(am), wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am), mtcNA, weights = wdat))[2:3], tolerance = tol) # 3 way factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):factor(vs):factor(am), wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):factor(am), mtcNA, weights = wdat))[2:3], tolerance = tol) if(failtests) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):factor(vs):factor(am), wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp , W(mtcNA, mpg + hp + disp ~ cyl + vs + am, wdat, stub = FALSE), weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variable expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, mtcNA, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs):gear + factor(am):carb + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):gear + factor(am):carb + wt, mtcNA, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and full interactions if(failtests) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs)*gear + factor(am):carb + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs)*gear + factor(am):carb + wt, mtcNA, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions + factor interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):factor(vs) + factor(am):carb + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am):carb + wt, mtcNA, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and polynomaial interactions if(failtests) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, mtcNA, weights = wdat))[2:3], tolerance = tol) # 3-way interaction continuous-factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):vs:gear + factor(am):carb + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):vs:gear + factor(am):carb + wt, mtcNA, weights = wdat))[2:3]) # 3-way interaction factor-continuous expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):factor(vs):gear + factor(am):carb + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):gear + factor(am):carb + wt, mtcNA, weights = wdat))[2:3], tolerance = tol) }) test_that("HDW weighted computations work like lm", { # ... if(failtests) expect_equal( unname(resid(lm(mpg ~ factor(cyl)*carb + factor(vs) + hp + gear, weights = wt, mtcars))), HDW(mtcars, mpg ~ factor(cyl)*carb + factor(vs) + hp + gear, mtcars$wt)[, 1], tolerance = 1e-4) if(failtests) expect_equal( unname(resid(lm(mpg ~ factor(cyl)*carb + factor(vs) + hp + gear, mtcars))), HDW(mtcars, mpg ~ factor(cyl)*carb + factor(vs) + hp + gear, lm.method = "qr")[, 1], tolerance = 1e-4) expect_equal( unname(resid(lm(mpg ~ factor(vs) + hp + gear, weights = wt, mtcars))), HDW(mtcars, mpg ~ factor(vs) + hp + gear, mtcars$wt)[, 1], tolerance = 1e-4) expect_equal( unname(resid(lm(mpg ~ factor(cyl) + factor(vs) + hp + gear, weights = wt, mtcars))), HDW(mtcars, mpg ~ factor(cyl) + factor(vs) + hp + gear, mtcars$wt)[, 1], tolerance = 1e-4) expect_equal( unname(resid(lm(mpg ~ hp + gear, weights = wt, mtcars))), HDW(mtcars, mpg ~ hp + gear, mtcars$wt)[, 1], tolerance = 1e-4) }) } test_that("HDB data.frame method (formula input) throw errors", { expect_error(HDB(mtcars, ~ cyl + vs1)) expect_error(HDB(mtcars, mpg1 + hp ~ cyl + vs)) expect_error(HDB(mtcars, ~ cyl + vs, cols = 13)) expect_error(HDB(mtcars, ~ cyl + vs, cols = "mpg2")) }) test_that("HDW data.frame method (formula input) throw errors", { expect_error(HDW(mtcars, ~ cyl + vs1)) expect_error(HDW(mtcars, mpg1 + hp ~ cyl + vs)) expect_error(HDW(mtcars, ~ cyl + vs, cols = 13)) expect_error(HDW(mtcars, ~ cyl + vs, cols = "mpg2")) }) options(warn = 1) collapse/tests/testthat/test-fmode.R0000644000176200001440000007262414167354141017271 0ustar liggesuserscontext("fmode") # rm(list = ls()) set.seed(101) x <- round(abs(10*rnorm(100))) w <- as.integer(round(abs(10*rnorm(100)))) # round(abs(rnorm(100)), 1) -> Numeric precision issues in R xNA <- x wNA <- w xNA[sample.int(100,20)] <- NA wNA[sample.int(100,20)] <- NA f <- as.factor(sample.int(10, 100, TRUE)) data <- wlddev[wlddev$iso3c %in% c("BLZ","IND","USA","SRB","GRL"), ] l <- nrow(data) g <- GRP(droplevels(data$iso3c)) gf <- as_factor_GRP(g) dataNA <- na_insert(data) m <- as.matrix(num_vars(data)) # without num_vars also works for ties = "first" mNA <- as.matrix(num_vars(dataNA)) wdat <- as.integer(round(10*abs(rnorm(l)))) # round(abs(rnorm(l)), 1) -> Numeric precision issues in R wdatNA <- wdat wdatNA[sample.int(l, floor(l/5))] <- NA ncv <- !char_vars(data, "logical") getdata <- function(first) if(first) data else gv(data, ncv) getdataNA <- function(first) if(first) dataNA else gv(dataNA, ncv) # seteltNA <- function(x,i,j) { # x[i,j] <- NA # x # } whichmax <- function(x) which(as.integer(x) == as.integer(max(x))) # This solves numeric precision issues minwa <- function(x) { xna <- unattrib(x) if(anyNA(xna)) { if(is.integer(xna)) return(`attributes<-`(NA_integer_, attributes(x))) # if(is.character(xna)) return(`attributes<-`(NA_character_, attributes(x))) if(is.numeric(xna)) { xna <- na_rm(xna) if(!length(xna)) return(`attributes<-`(NA_real_, attributes(x))) } } `attributes<-`(`storage.mode<-`(base::min(xna), storage.mode(x)), attributes(x)) } maxwa <- function(x) { xna <- unattrib(x) if(is.numeric(xna) && anyNA(xna)) { xna <- na_rm(xna) if(!length(xna)) return(`attributes<-`(NA_real_, attributes(x))) } `attributes<-`(`storage.mode<-`(base::max(xna), storage.mode(x)), attributes(x)) } if(identical(Sys.getenv("NCRAN"), "TRUE")) { # This is to fool very silly checks on CRAN scanning the code of the tests rowidv <- eval(parse(text = paste0("data.table", ":", ":", "rowidv"))) # firstmode <- function(x) { # ox <- sort(x) # ox[which.max(rowidv(ox))] # } unam <- function(x) `names<-`(x, NULL) Mode <- function(x, na.rm = FALSE, ties = "first") { if(na.rm) { miss <- is.na(x) if(all(miss)) return(x[1L]) x <- x[!miss] } o <- radixorder(x) ox <- unam(x)[o] switch(ties, first = unam(x)[which.max(rowidv(ox)[radixorder(o)])], min = minwa(ox[whichmax(rowidv(ox))]), max = maxwa(ox[whichmax(rowidv(ox))]), stop("Unknown ties option")) } } # Mode <- function(x, na.rm = FALSE, ties = "first") { # if(na.rm) x <- x[!is.na(x)] # ux <- unique(x) # switch(ties, # first = ux[which.max(tabulate(match(x, ux)))], # min = minwa(ux[whichmax(tabulate(match(x, ux)))]), # max = maxwa(ux[whichmax(tabulate(match(x, ux)))]), # stop("Unknown ties option")) # } wMode <- function(x, w, na.rm = FALSE, ties = "first") { ax <- attributes(x) cc <- complete.cases(x, w) if(!any(cc)) return(`storage.mode<-`(NA, storage.mode(x))) if(na.rm) { w <- w[cc] x <- x[cc] } g <- GRP.default(x, call = FALSE) switch(ties, first = { g <- as_factor_GRP(g) o <- radixorder(unlist(split.default(seq_along(w), g), use.names = FALSE)) sw <- unlist(lapply(split.default(w, g), base::cumsum), use.names = FALSE)[o] fsubset.default(x, which.max(sw)) }, min = minwa(fsubset.default(g[["groups"]][[1L]], whichmax(fsum.default(w, g, use.g.names = FALSE)))), max = maxwa(fsubset.default(g[["groups"]][[1L]], whichmax(fsum.default(w, g, use.g.names = FALSE)))), stop("Unknown ties option")) # storage.mode(res) <- storage.mode(x) # `attributes<-`(res, ax) } wBY <- function(x, f, FUN, w, ...) { if(is.atomic(x) && !is.array(x)) return(mapply(FUN, split(x, f), split(w, f), ...)) wspl <- split(w, f) if(is.atomic(x)) return(dapply(x, function(xi) mapply(FUN, split(xi, f), wspl, ...))) dapply(dapply(x, function(xi) { r <- Map(FUN, split(xi, f), wspl, ...) if(is_date(xi)) do.call(c, r) else unlist(r) }), `names<-`, NULL) } if(identical(Sys.getenv("NCRAN"), "TRUE")) { test_that("fmode performs like Mode (defined above)", { for(t in c("first","min","max")) { # print(t) tf <- t == "first" expect_equal(fmode(NA, ties = t), Mode(NA, ties = t)) expect_equal(fmode(NA, na.rm = FALSE, ties = t), Mode(NA, ties = t)) expect_equal(fmode(1, ties = t), Mode(1, na.rm = TRUE, ties = t)) expect_equal(fmode(1:3, ties = t), Mode(1:3, na.rm = TRUE, ties = t)) expect_equal(fmode(-1:1, ties = t), Mode(-1:1, na.rm = TRUE, ties = t)) expect_equal(fmode(1, na.rm = FALSE, ties = t), Mode(1, ties = t)) expect_equal(fmode(1:3, na.rm = FALSE, ties = t), Mode(1:3, ties = t)) expect_equal(fmode(-1:1, na.rm = FALSE, ties = t), Mode(-1:1, ties = t)) expect_equal(fmode(x, ties = t), Mode(x, na.rm = TRUE, ties = t)) expect_equal(fmode(x, na.rm = FALSE, ties = t), Mode(x, ties = t)) if(tf) expect_equal(fmode(xNA, na.rm = FALSE, ties = t), Mode(xNA, ties = t)) expect_equal(fmode(xNA, ties = t), Mode(xNA, na.rm = TRUE, ties = t)) # expect_equal(as.character(fmode(data, drop = FALSE)), fmode(m)) expect_equal(fmode(m, ties = t), dapply(m, Mode, na.rm = TRUE, ties = t)) expect_equal(fmode(m, na.rm = FALSE, ties = t), dapply(m, Mode, ties = t)) if(tf) expect_equal(fmode(mNA, na.rm = FALSE, ties = t), dapply(mNA, Mode, ties = t)) expect_equal(fmode(mNA, ties = t), dapply(mNA, Mode, na.rm = TRUE, ties = t)) expect_equal(fmode(getdata(tf), ties = t, drop = FALSE), dapply(getdata(tf), Mode, na.rm = TRUE, ties = t, drop = FALSE)) expect_equal(fmode(getdata(tf), na.rm = FALSE, ties = t, drop = FALSE), dapply(getdata(tf), Mode, ties = t, drop = FALSE)) if(tf) expect_equal(fmode(dataNA, na.rm = FALSE, ties = t, drop = FALSE), dapply(dataNA, Mode, ties = t, drop = FALSE)) expect_equal(fmode(getdataNA(tf), ties = t, drop = FALSE), dapply(getdataNA(tf), Mode, na.rm = TRUE, ties = t, drop = FALSE)) expect_equal(fmode(x, f, ties = t), BY(x, f, Mode, na.rm = TRUE, ties = t)) expect_equal(fmode(x, f, na.rm = FALSE, ties = t), BY(x, f, Mode, ties = t)) if(tf) expect_equal(fmode(xNA, f, na.rm = FALSE, ties = t), BY(xNA, f, Mode, ties = t)) expect_equal(fmode(xNA, f, ties = t), BY(xNA, f, Mode, na.rm = TRUE, ties = t)) expect_equal(fmode(m, g, ties = t), BY(m, g, Mode, na.rm = TRUE, ties = t)) expect_equal(fmode(m, g, na.rm = FALSE, ties = t), BY(m, g, Mode, ties = t)) if(tf) expect_equal(fmode(mNA, g, na.rm = FALSE), BY(mNA, g, Mode)) # Mode gives NA expect_equal(fmode(mNA, g, ties = t), BY(mNA, g, Mode, na.rm = TRUE, ties = t)) expect_equal(fmode(getdata(tf), g, ties = t), BY(getdata(tf), g, Mode, na.rm = TRUE, ties = t)) expect_equal(fmode(getdata(tf), g, na.rm = FALSE, ties = t), BY(getdata(tf), g, Mode, ties = t)) if(tf) expect_equal(fmode(dataNA, g, na.rm = FALSE), BY(dataNA, g, Mode)) # Mode gives NA expect_equal(fmode(getdataNA(tf), g, ties = t), BY(getdataNA(tf), g, Mode, na.rm = TRUE, ties = t)) } }) } test_that("fmode with weights performs as intended (unbiased)", { expect_equal(fmode(c(2,2,4,5,5,5)), fmode(c(2,4,5), w = c(2,1,3))) expect_equal(fmode(c(2,2,4,5,5,5), na.rm = FALSE), fmode(c(2,4,5), w = c(2,1,3), na.rm = FALSE)) expect_equal(fmode(c(2.456,2.456,4.123,5.009,5.009,5.009)), fmode(c(2.456,4.123,5.009), w = c(2,1,3))) expect_equal(fmode(c(2.456,2.456,4.123,5.009,5.009,5.009), na.rm = FALSE), fmode(c(2.456,4.123,5.009), w = c(2,1,3), na.rm = FALSE)) expect_equal(fmode(c(2,2,NA,5,5,5)), fmode(c(2,NA,5), w = c(2,1,3))) expect_equal(fmode(c(2,2,NA,5,5,5), na.rm = FALSE), fmode(c(2,NA,5), w = c(2,1,3), na.rm = FALSE)) expect_equal(fmode(c(2,2,NA,5,5,5)), fmode(c(2,4,5), w = c(2,NA,3))) expect_equal(fmode(c(2,2,NA,5,5,5), na.rm = FALSE), fmode(c(2,4,5), w = c(2,NA,3), na.rm = FALSE)) expect_equal(fmode(c(NA,NA,4.123,5.009,5.009,5.009)), fmode(c(NA,4.123,5.009), w = c(2,1,3))) expect_equal(fmode(c(NA,NA,4.123,5.009,5.009,5.009), na.rm = FALSE), fmode(c(NA,4.123,5.009), w = c(2,1,3), na.rm = FALSE)) expect_equal(fmode(c(NA,NA,4.123,5.009,5.009,5.009)), fmode(c(2.456,4.123,5.009), w = c(NA,1,3))) expect_equal(fmode(c(NA,NA,4.123,5.009,5.009,5.009), na.rm = FALSE), fmode(c(2.456,4.123,5.009), w = c(NA,1,3), na.rm = FALSE)) f <- as.factor(rep(1:2, each = 6)); fs <- as.factor(rep(1:2, each = 3)) v <- c(2,2,4,5,5,5,2,2,4,5,5,5); vs <- c(2,4,5,2,4,5); w <- c(2,1,3,2,1,3) v2 <- c(2.456,2.456,4.123,5.009,5.009,5.009,2.456,2.456,4.123,5.009,5.009,5.009); v2s <- c(2.456,4.123,5.009,2.456,4.123,5.009) expect_equal(fmode(v, f), fmode(vs, fs, w)) expect_equal(fmode(v, f, na.rm = FALSE), fmode(vs, fs, w, na.rm = FALSE)) expect_equal(fmode(v2, f), fmode(v2s, fs, w)) expect_equal(fmode(v2, f, na.rm = FALSE), fmode(v2s, fs, w, na.rm = FALSE)) v[c(3,9)] <- NA; vs[c(2,5)] <- NA expect_equal(fmode(v, f), fmode(vs, fs, w)) expect_equal(fmode(v, f, na.rm = FALSE), fmode(vs, fs, w, na.rm = FALSE)) vs[c(2,5)] <- 4; w[c(2,5)] <- NA expect_equal(fmode(v, f), fmode(vs, fs, w)) expect_equal(fmode(v, f, na.rm = FALSE), fmode(vs, fs, w, na.rm = FALSE)) w[c(2,5)] <- 1; v2[c(1:2,7:8)] <- NA; v2s[c(1,4)] <- NA expect_equal(fmode(v2, f), fmode(v2s, fs, w)) expect_equal(fmode(v2, f, na.rm = FALSE), fmode(v2s, fs, w, na.rm = FALSE)) v2s[c(1,4)] <- 2.456; w[c(1,4)] <- NA expect_equal(fmode(v2, f), fmode(v2s, fs, w)) expect_equal(fmode(v2, f, na.rm = FALSE), fmode(v2s, fs, w, na.rm = FALSE)) }) test_that("fmode performs like fmode with weights all equal", { for(t in c("first","min","max")) { expect_equal(fmode(NA, ties = t), fmode(NA, w = 0.9, ties = t)) expect_equal(fmode(NA, na.rm = FALSE, ties = t), fmode(NA, w = 2.946, na.rm = FALSE, ties = t)) expect_equal(fmode(1, ties = t), fmode(1, w = 3, ties = t)) expect_equal(fmode(1:3, ties = t), fmode(1:3, w = rep(0.9,3), ties = t)) expect_equal(fmode(-1:1, ties = t), fmode(-1:1, w = rep(4.2,3), ties = t)) expect_equal(fmode(1, na.rm = FALSE, ties = t), fmode(1, w = 5, na.rm = FALSE, ties = t)) expect_equal(fmode(1:3, na.rm = FALSE, ties = t), fmode(1:3, w = rep(1.4, 3), na.rm = FALSE, ties = t)) expect_equal(fmode(-1:1, na.rm = FALSE, ties = t), fmode(-1:1, w = rep(1.4, 3), na.rm = FALSE, ties = t)) # expect_equal(fmode(x, ties = t), fmode(x, w = rep(1,100), ties = t)) # expect_equal(fmode(x, na.rm = FALSE, ties = t), fmode(x, w = rep(1.4, 100), na.rm = FALSE, ties = t)) # failed on patched solaris... # expect_equal(fmode(xNA, na.rm = FALSE, ties = t), fmode(xNA, w = rep(4.6, 100), na.rm = FALSE, ties = t)) # expect_equal(fmode(xNA, ties = t), fmode(xNA, w = rep(4.6, 100), ties = t)) # failed on patched solaris... expect_equal(fmode(m, ties = t), fmode(m, w = rep(6587, l), ties = t)) expect_equal(fmode(m, na.rm = FALSE, ties = t), fmode(m, w = rep(6587, l), na.rm = FALSE, ties = t)) expect_equal(fmode(mNA, na.rm = FALSE, ties = t), fmode(mNA, w = rep(6587, l), na.rm = FALSE, ties = t)) expect_equal(fmode(mNA, ties = t), fmode(mNA, w = rep(6587, l), ties = t)) expect_equal(fmode(data, ties = t), fmode(data, w = rep(6787, l), ties = t)) expect_equal(fmode(data, na.rm = FALSE, ties = t), fmode(data, w = rep(6787, l), na.rm = FALSE, ties = t)) expect_equal(fmode(dataNA, na.rm = FALSE, ties = t), fmode(dataNA, w = rep(6787, l), na.rm = FALSE, ties = t)) expect_equal(fmode(dataNA, ties = t), fmode(dataNA, w = rep(6787, l), ties = t)) expect_equal(fmode(x, f, ties = t), fmode(x, f, rep(546,100), ties = t)) expect_equal(fmode(x, f, na.rm = FALSE, ties = t), fmode(x, f, rep(5,100), na.rm = FALSE, ties = t)) # expect_equal(fmode(xNA, f, na.rm = FALSE, ties = t), fmode(xNA, f, rep(52.7,100), na.rm = FALSE, ties = t)) # Failed sometimes for some reason... v. 1.5.1 error expect_equal(fmode(xNA, f, ties = t), fmode(xNA, f, rep(599,100), ties = t)) expect_equal(fmode(m, g, ties = t), fmode(m, g, rep(546,l), ties = t)) expect_equal(fmode(m, g, na.rm = FALSE, ties = t), fmode(m, g, rep(1,l), na.rm = FALSE, ties = t)) expect_equal(fmode(mNA, g, na.rm = FALSE, ties = t), fmode(mNA, g, rep(7,l), na.rm = FALSE, ties = t)) expect_equal(fmode(mNA, g, ties = t), fmode(mNA, g, rep(1,l), ties = t)) expect_equal(fmode(data, g, ties = t), fmode(data, g, rep(53,l), ties = t)) expect_equal(fmode(data, g, na.rm = FALSE, ties = t), fmode(data, g, rep(546,l), na.rm = FALSE, ties = t)) expect_equal(fmode(dataNA, g, na.rm = FALSE, ties = t), fmode(dataNA, g, rep(1,l), na.rm = FALSE, ties = t)) # rep(0.999999,l) failed CRAN Arch i386 expect_equal(fmode(dataNA, g, ties = t), fmode(dataNA, g, rep(999,l), ties = t)) # rep(999.9999,l) failed CRAN Arch i386 } }) test_that("fmode with weights performs like wMode (defined above)", { for(t in c("first","min","max")) { # print(t) tf <- t == "first" # complete weights expect_equal(fmode(NA, w = 1, ties = t), wMode(NA, 1, ties = t)) expect_equal(fmode(NA, w = 1, na.rm = FALSE, ties = t), wMode(NA, 1, ties = t)) expect_equal(fmode(1, w = 1, ties = t), wMode(1, w = 1, ties = t)) expect_equal(fmode(1:3, w = 1:3, ties = t), wMode(1:3, 1:3, ties = t)) expect_equal(fmode(-1:1, w = 1:3, ties = t), wMode(-1:1, 1:3, ties = t)) expect_equal(fmode(1, w = 1, na.rm = FALSE, ties = t), wMode(1, 1, ties = t)) expect_equal(fmode(1:3, w = c(0.99,3454,1.111), na.rm = FALSE, ties = t), wMode(1:3, c(0.99,3454,1.111), ties = t)) expect_equal(fmode(-1:1, w = 1:3, na.rm = FALSE, ties = t), wMode(-1:1, 1:3, ties = t)) expect_equal(fmode(x, w = w, ties = t), wMode(x, w, ties = t)) expect_equal(fmode(x, w = w, na.rm = FALSE, ties = t), wMode(x, w, ties = t)) if(tf) expect_equal(fmode(xNA, w = w, na.rm = FALSE, ties = t), wMode(xNA, w, ties = t)) expect_equal(fmode(xNA, w = w, ties = t), wMode(xNA, w, na.rm = TRUE, ties = t)) # expect_equal(fmode(data, w = wdat, drop = FALSE, ties = t), fmode(m, w = wdat, ties = t)) expect_equal(fmode(m, w = wdat, ties = t), dapply(m, wMode, wdat, na.rm = TRUE, ties = t)) expect_equal(fmode(m, w = wdat, na.rm = FALSE, ties = t), dapply(m, wMode, wdat, ties = t)) if(tf) expect_equal(fmode(mNA, w = wdat, na.rm = FALSE, ties = t), dapply(mNA, wMode, wdat, ties = t)) expect_equal(fmode(mNA, w = wdat, ties = t), dapply(mNA, wMode, wdat, na.rm = TRUE, ties = t)) expect_equal(fmode(getdata(tf), w = wdat, drop = FALSE, ties = t), dapply(getdata(tf), wMode, wdat, na.rm = TRUE, drop = FALSE, ties = t)) expect_equal(fmode(getdata(tf), w = wdat, na.rm = FALSE, drop = FALSE, ties = t), dapply(getdata(tf), wMode, wdat, drop = FALSE, ties = t)) if(tf) expect_equal(fmode(dataNA, w = wdat, na.rm = FALSE, drop = FALSE, ties = t), dapply(dataNA, wMode, wdat, drop = FALSE, ties = t)) expect_equal(fmode(getdataNA(tf), w = wdat, drop = FALSE, ties = t), dapply(getdataNA(tf), wMode, wdat, na.rm = TRUE, drop = FALSE, ties = t)) expect_equal(fmode(x, f, w, ties = t), wBY(x, f, wMode, w, ties = t)) expect_equal(fmode(x, f, w, na.rm = FALSE, ties = t), wBY(x, f, wMode, w, ties = t)) if(tf) expect_equal(fmode(xNA, f, w, na.rm = FALSE, ties = t), wBY(xNA, f, wMode, w, ties = t)) expect_equal(fmode(xNA, f, w, ties = t), wBY(xNA, f, wMode, w, na.rm = TRUE, ties = t)) expect_equal(fmode(m, g, wdat, ties = t), wBY(m, gf, wMode, wdat, na.rm = TRUE, ties = t)) expect_equal(fmode(m, g, wdat, na.rm = FALSE, ties = t), wBY(m, gf, wMode, wdat, ties = t)) if(tf) expect_equal(fmode(mNA, g, wdat, na.rm = FALSE, ties = t), wBY(mNA, gf, wMode, wdat, ties = t)) expect_equal(fmode(mNA, g, wdat, ties = t), wBY(mNA, gf, wMode, wdat, na.rm = TRUE, ties = t)) expect_equal(fmode(getdata(tf), g, wdat, ties = t), wBY(getdata(tf), gf, wMode, wdat, na.rm = TRUE, ties = t)) expect_equal(fmode(getdata(tf), g, wdat, na.rm = FALSE, ties = t), wBY(getdata(tf), gf, wMode, wdat, ties = t)) if(tf) expect_equal(fmode(dataNA, g, wdat, na.rm = FALSE, ties = t), wBY(dataNA, gf, wMode, wdat, ties = t)) expect_equal(fmode(getdataNA(tf), g, wdat, ties = t), wBY(getdataNA(tf), gf, wMode, wdat, na.rm = TRUE, ties = t)) # missing weights: # missing weights are summed : wsum is NA.... fmode does not properly deal with missing weights if na.rm = FALSE expect_equal(fmode(NA, w = NA, ties = t), wMode(NA, NA, ties = t)) # expect_equal(fmode(1, w = NA, ties = t), wMode(1, w = NA, ties = t)) expect_equal(fmode(1:3, w = c(NA,1:2), ties = t), wMode(1:3, c(NA,1:2), na.rm = TRUE, ties = t)) expect_equal(fmode(-1:1, w = c(NA,1:2), ties = t), wMode(-1:1, c(NA,1:2), na.rm = TRUE, ties = t)) expect_equal(fmode(x, w = wNA, ties = t), wMode(x, wNA, na.rm = TRUE, ties = t)) expect_equal(fmode(xNA, w = wNA, ties = t), wMode(xNA, wNA, na.rm = TRUE, ties = t)) # expect_equal(fmode(data, w = wdatNA, ties = t), fmode(m, w = wdatNA, ties = t)) expect_equal(fmode(m, w = wdatNA, ties = t), dapply(m, wMode, wdatNA, na.rm = TRUE, ties = t)) expect_equal(fmode(mNA, w = wdatNA, ties = t), dapply(mNA, wMode, wdatNA, na.rm = TRUE, ties = t)) expect_equal(fmode(getdata(tf), w = wdatNA, ties = t, drop = FALSE), dapply(getdata(tf), wMode, wdatNA, na.rm = TRUE, ties = t, drop = FALSE)) expect_equal(fmode(getdataNA(tf), w = wdatNA, ties = t, drop = FALSE), dapply(getdataNA(tf), wMode, wdatNA, na.rm = TRUE, ties = t, drop = FALSE)) # expect_equal(fmode(x, f, wNA, ties = t), wBY(x, f, wMode, wNA, na.rm = TRUE, ties = t)) # failed on MAC OSX # expect_equal(fmode(xNA, f, wNA, ties = t), wBY(xNA, f, wMode, wNA, na.rm = TRUE, ties = t)) # failed on mac OSX... expect_equal(fmode(m, g, wdatNA, ties = t), wBY(m, gf, wMode, wdatNA, na.rm = TRUE, ties = t)) expect_equal(fmode(mNA, g, wdatNA, ties = t), wBY(mNA, gf, wMode, wdatNA, na.rm = TRUE, ties = t)) expect_equal(fmode(getdata(tf), g, wdatNA, ties = t), wBY(getdata(tf), gf, wMode, wdatNA, na.rm = TRUE, ties = t)) expect_equal(fmode(getdataNA(tf), g, wdatNA, ties = t), wBY(getdataNA(tf), gf, wMode, wdatNA, na.rm = TRUE, ties = t)) } }) test_that("fmode performs numerically stable", { for(t in c("first","min","max")) { expect_true(all_obj_equal(replicate(50, fmode(1, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(NA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(NA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, f, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, f, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, f, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, f, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, g, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, g, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, g, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, g, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, g, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, g, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, g, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, g, ties = t), simplify = FALSE))) } }) test_that("fmode with complete weights performs numerically stable", { for(t in c("first","min","max")) { expect_true(all_obj_equal(replicate(50, fmode(1, w = 1, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(NA, w = 1, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(NA, w = 1, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, w = w, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, w = w, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, w = w, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, w = w, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, w = wdat, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, w = wdat, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, w = wdat, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, w = wdat, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, w = wdat, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, w = wdat, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, w = wdat, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, w = wdat, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, f, w, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, f, w, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, f, w, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, f, w, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, g, wdat, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, g, wdat, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, g, wdat, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, g, wdat, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, g, wdat, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, g, wdat, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, g, wdat, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, g, wdat, ties = t), simplify = FALSE))) } }) test_that("fmode with missing weights performs numerically stable", { for(t in c("first","min","max")) { expect_true(all_obj_equal(replicate(50, fmode(1, w = NA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(NA, w = NA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(NA, w = NA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, w = wNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, w = wNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, w = wNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, w = wNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, w = wdatNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, w = wdatNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, w = wdatNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, w = wdatNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, w = wdatNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, w = wdatNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, w = wdatNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, w = wdatNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, f, wNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, f, wNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, f, wNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, f, wNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, g, wdatNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, g, wdatNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, g, wdatNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, g, wdatNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, g, wdatNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, g, wdatNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, g, wdatNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, g, wdatNA, ties = t), simplify = FALSE))) } }) test_that("fmode handles special values in the right way", { expect_equal(fmode(NA), NA) expect_equal(fmode(NaN), NaN) expect_equal(fmode(Inf), Inf) expect_equal(fmode(-Inf), -Inf) expect_equal(fmode(TRUE), TRUE) expect_equal(fmode(FALSE), FALSE) expect_equal(fmode(NA, na.rm = FALSE), NA) expect_equal(fmode(NaN, na.rm = FALSE), NaN) expect_equal(fmode(Inf, na.rm = FALSE), Inf) expect_equal(fmode(-Inf, na.rm = FALSE), -Inf) expect_equal(fmode(TRUE, na.rm = FALSE), TRUE) expect_equal(fmode(FALSE, na.rm = FALSE), FALSE) expect_equal(fmode(c(1,NA)), 1) expect_equal(fmode(c(1,NaN)), 1) expect_equal(fmode(c(1,Inf)), 1) expect_equal(fmode(c(1,-Inf)), 1) expect_equal(fmode(c(FALSE,TRUE)), TRUE) # ??????? expect_equal(fmode(c(FALSE,FALSE)), FALSE) expect_equal(fmode(c(1,Inf), na.rm = FALSE), 1) expect_equal(fmode(c(1,-Inf), na.rm = FALSE), 1) expect_equal(fmode(c(FALSE,TRUE), na.rm = FALSE), TRUE) # ?????? expect_equal(fmode(c(FALSE,FALSE), na.rm = FALSE), FALSE) }) test_that("fmode with weights handles special values in the right way", { expect_equal(fmode(NA, w = 1), NA) expect_equal(fmode(NaN, w = 1), NaN) expect_equal(fmode(Inf, w = 1), Inf) expect_equal(fmode(-Inf, w = 1), -Inf) expect_equal(fmode(TRUE, w = 1), TRUE) expect_equal(fmode(FALSE, w = 1), FALSE) expect_equal(fmode(NA, w = 1, na.rm = FALSE), NA) expect_equal(fmode(NaN, w = 1, na.rm = FALSE), NaN) expect_equal(fmode(Inf, w = 1, na.rm = FALSE), Inf) expect_equal(fmode(-Inf, w = 1, na.rm = FALSE), -Inf) expect_equal(fmode(TRUE, w = 1, na.rm = FALSE), TRUE) expect_equal(fmode(FALSE, w = 1, na.rm = FALSE), FALSE) expect_equal(fmode(NA, w = NA), NA) expect_equal(fmode(NaN, w = NA), NaN) expect_equal(fmode(Inf, w = NA), Inf) expect_equal(fmode(-Inf, w = NA), -Inf) expect_equal(fmode(TRUE, w = NA), NA) expect_equal(fmode(FALSE, w = NA), NA) expect_equal(fmode(NA, w = NA, na.rm = FALSE), NA) expect_equal(fmode(NaN, w = NA, na.rm = FALSE), NaN) expect_equal(fmode(Inf, w = NA, na.rm = FALSE), Inf) expect_equal(fmode(-Inf, w = NA, na.rm = FALSE), -Inf) expect_equal(fmode(TRUE, w = NA, na.rm = FALSE), NA) expect_equal(fmode(FALSE, w = NA, na.rm = FALSE), NA) expect_equal(fmode(1:3, w = c(1,Inf,3)), 2) expect_equal(fmode(1:3, w = c(1,-Inf,3)), 3) expect_equal(fmode(1:3, w = c(1,Inf,3), na.rm = FALSE), 2) expect_equal(fmode(1:3, w = c(1,-Inf,3), na.rm = FALSE), 3) }) test_that("fmode produces errors for wrong input", { expect_visible(fmode("a")) expect_visible(fmode(NA_character_)) expect_visible(fmode(mNA)) expect_error(fmode(mNA, f)) expect_error(fmode(1:2,1:3)) expect_error(fmode(m,1:31)) expect_error(fmode(data,1:31)) expect_error(fmode(data, w = 1:31)) expect_visible(fmode("a", w = 1)) expect_error(fmode(1:2, w = 1:3)) expect_visible(fmode(NA_character_, w = 1)) expect_visible(fmode(mNA, w = wdat)) expect_error(fmode(mNA, f, wdat)) expect_error(fmode(mNA, w = 1:33)) expect_error(fmode(1:2,1:2, 1:3)) expect_error(fmode(m,1:32,1:20)) expect_error(fmode(data,1:32,1:10)) expect_error(fmode(1:2, w = c("a","b"))) expect_visible(fmode(wlddev)) expect_visible(fmode(wlddev, w = wlddev$year, drop = FALSE)) expect_visible(fmode(wlddev, wlddev$iso3c)) expect_visible(fmode(wlddev, wlddev$iso3c, wlddev$year)) }) collapse/tests/testthat/test-fsubset-ftransform.R0000644000176200001440000001535314167345222022025 0ustar liggesuserscontext("fsubset and ftransform") # rm(list = ls()) set.seed(101) v <- na_insert(mtcars$mpg) m <- na_insert(as.matrix(mtcars)) test_that("fsubset works like base::subset for vectors and matrices", { expect_equal(fsubset(v, 1:3), v[1:3]) expect_equal(fsubset(v, 4:8), v[4:8]) expect_error(fsubset(v, -(1:3))) # This does not work !! expect_equal(fsubset(v, v > 16), v[v > 16 & !is.na(v)]) expect_equal(fsubset(m, 1:3), m[1:3, ]) expect_equal(fsubset(m, v > 16), m[v > 16, ]) expect_equal(fsubset(m, -(4:8)), m[-(4:8), ]) expect_equal(fsubset(m, -(4:8), 1:5), m[-(4:8), 1:5]) expect_equal(fsubset(m, v > 16 & !is.na(v), mpg:vs), subset(m, v > 16 & !is.na(v), mpg:vs)) expect_equal(fsubset(m, v > 16 & !is.na(v), mpg, cyl:vs), subset(m, v > 16 & !is.na(v), c(mpg, cyl:vs))) expect_equal(fsubset(m, v > 16 & !is.na(v), -mpg), subset(m, v > 16 & !is.na(v), -mpg)) expect_equal(fsubset(m, v > 16 & !is.na(v), -(mpg:vs)), subset(m, v > 16 & !is.na(v), -(mpg:vs))) }) test_that("fsubset works like base::subset for data frames", { expect_equal(unattrib(fsubset(airquality, Ozone > 42)), unattrib(subset(airquality, Ozone > 42))) expect_equal(unattrib(fsubset(airquality, Temp > 80, Ozone, Temp)), unattrib(subset(airquality, Temp > 80, select = c(Ozone, Temp)))) expect_equal(unattrib(fsubset(airquality, Day == 1, -Temp)), unattrib(subset(airquality, Day == 1, select = -Temp))) expect_equal(unattrib(fsubset(airquality, Day == 1, -(Day:Temp))), unattrib(subset(airquality, Day == 1, -(Day:Temp)))) expect_equal(unattrib(fsubset(airquality, Day == 1, Ozone:Wind)), unattrib(subset(airquality, Day == 1, Ozone:Wind))) expect_equal(unattrib(fsubset(airquality, Day == 1 & !is.na(Ozone), Ozone:Wind, Month)), unattrib(subset(airquality, Day == 1 & !is.na(Ozone), c(Ozone:Wind, Month)))) }) test_that("fsubset column renaming", { expect_equal(names(fsubset(airquality, Temp > 90, OZ = Ozone, Temp)), .c(OZ, Temp)) expect_equal(names(fsubset(mtcars, cyl == 4, bla = cyl)), "bla") }) test_that("ss works like an improved version of [", { # replaced setRownames wit unattrib because of unexplained test failures on some systems expect_equal(ss(airquality, 1:100, 1:3), airquality[1:100, 1:3]) expect_equal(unattrib(ss(airquality, -(1:100), 1:3)), unattrib(airquality[-(1:100), 1:3])) expect_equal(ss(airquality, 1:100, -(1:3)), airquality[1:100, -(1:3)]) expect_equal(unattrib(ss(airquality, -(1:100), -(1:3))), unattrib(airquality[-(1:100), -(1:3)])) nam <- names(airquality)[2:5] set.seed(101) v <- sample.int(fnrow(airquality), 100) expect_equal(unattrib(ss(airquality, v, nam)), unattrib(airquality[v, nam, drop = FALSE])) expect_equal(unattrib(ss(airquality, -v, nam)), unattrib(airquality[-v, nam, drop = FALSE])) set.seed(101) vl <- sample(c(TRUE, FALSE), fnrow(airquality), replace = TRUE) cl <- sample(c(TRUE, FALSE), fncol(airquality), replace = TRUE) expect_equal(unattrib(ss(airquality, vl, nam)), unattrib(airquality[vl, nam, drop = FALSE])) expect_equal(unattrib(ss(airquality, vl, cl)), unattrib(airquality[vl, cl, drop = FALSE])) set.seed(101) vl <- na_insert(vl) cl[4L] <- NA expect_equal(unattrib(ss(airquality, vl, nam)), unattrib(airquality[vl & !is.na(vl), nam, drop = FALSE])) expect_equal(unattrib(ss(airquality, vl, cl)), unattrib(airquality[vl & !is.na(vl), cl & !is.na(cl), drop = FALSE])) }) test_that("ftransform works like base::transform", { expect_equal(ftransform(airquality, Ozone = -Ozone), transform(airquality, Ozone = -Ozone)) expect_equal(ftransform(airquality, new = Ozone / Wind * 100), transform(airquality, new = Ozone / Wind * 100)) expect_equal(ftransform(airquality, new = -Ozone, Temp = (Temp-32)/1.8), transform(airquality, new = -Ozone, Temp = (Temp-32)/1.8)) expect_equal(ftransform(airquality, new = -Ozone, new2 = 1, Temp = NULL), transform(airquality, new = -Ozone, new2 = 1, Temp = NULL)) expect_equal(ftransform(airquality, Ozone = NULL, Temp = NULL), transform(airquality, Ozone = NULL, Temp = NULL)) }) test_that("fcompute works well", { expect_equal(fcompute(airquality, new = -Ozone, new2 = 1, keep = 1:3), ftransform(airquality[1:3], new = -Ozone, new2 = 1)) expect_equal(names(fcompute(airquality, new = -Ozone, new2 = 1, keep = 1:3)), .c(Ozone, Solar.R, Wind, new, new2)) expect_equal(names(fcompute(airquality, new = -Ozone, new2 = 1)), .c(new, new2)) expect_equal(names(fcompute(airquality, Ozone = -Ozone, new = 1, keep = 1:3)), .c(Ozone, Solar.R, Wind, new)) }) test_that("fcomputev works well", { expect_equal(fcomputev(iris, is.numeric, log), dapply(nv(iris), log)) expect_equal(fcomputev(iris, is.numeric, fcumsum, apply = FALSE), fcumsum(nv(iris))) expect_equal(fcomputev(iris, is.numeric, `/`, Sepal.Length), nv(iris) %c/% iris$Sepal.Length) expect_equal(fcomputev(iris, is.numeric, fmean, Species, TRA = "replace", apply = FALSE), fmean(nv(iris), iris$Species, TRA = "replace")) expect_equal(fcomputev(iris, is.numeric, log, keep = "Species"), colorder(ftransformv(iris, is.numeric, log), Species)) expect_equal(fcomputev(iris, is.numeric, log, keep = names(iris)), ftransformv(iris, is.numeric, log)) expect_equal(fcomputev(iris, is.numeric, fcumsum, apply = FALSE, keep = "Species"), colorder(ftransformv(iris, is.numeric, fcumsum, apply = FALSE), Species)) expect_equal(fcomputev(iris, is.numeric, fcumsum, apply = FALSE, keep = names(iris)), ftransformv(iris, is.numeric, fcumsum, apply = FALSE)) expect_equal(fcomputev(iris, is.numeric, `/`, Sepal.Length, keep = "Species"), colorder(ftransformv(iris, is.numeric, `/`, Sepal.Length), Species)) expect_equal(fcomputev(iris, is.numeric, `/`, Sepal.Length, keep = names(iris)), ftransformv(iris, is.numeric, `/`, Sepal.Length)) expect_equal(fcomputev(iris, is.numeric, fmean, Species, TRA = "replace", apply = FALSE, keep = "Species"), colorder(ftransformv(iris, is.numeric, fmean, Species, TRA = "replace", apply = FALSE), Species)) expect_equal(fcomputev(iris, is.numeric, fmean, Species, TRA = "replace", apply = FALSE, keep = names(iris)), ftransformv(iris, is.numeric, fmean, Species, TRA = "replace", apply = FALSE)) }) # Still do wrong input... test_that("fsubset error for wrong input", { # expect_error(fsubset(mtcars, mpg)) expect_warning(fsubset(mtcars, mpg:cyl)) expect_error(fsubset(mtcars, "mpg")) expect_error(fsubset(mtcars, TRUE)) expect_error(fsubset(mtcars, mpg > 15, cyl < 4)) expect_error(fsubset(mtcars, mpg > 15, TRUE)) expect_error(fsubset(mtcars, mpg > 15, 35)) expect_error(fsubset(mtcars, mpg > 15, ~mpg)) }) collapse/tests/testthat/test-GRP.R0000644000176200001440000006667714174223734016643 0ustar liggesuserscontext("radixorder, GRP, qF, qG") # rm(list = ls()) set.seed(101) mtcNA <- na_insert(mtcars) wlddev2 <- slt(wlddev, -date) num_vars(wlddev2) <- round(num_vars(wlddev2), 8) wldNA <- na_insert(wlddev2) GGDCNA <- na_insert(GGDC10S) unlab <- function(x) `attr<-`(x, "label", NULL) test_that("radixorder works like order(.., method = 'radix')", { # Ordering single variable expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x))), lapply(wldNA, order, method = "radix")) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE))), lapply(wldNA, order, method = "radix", decreasing = TRUE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, na.last = FALSE))), lapply(wldNA, order, method = "radix", na.last = FALSE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE, na.last = FALSE))), lapply(wldNA, order, method = "radix", decreasing = TRUE, na.last = FALSE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, na.last = NA))), lapply(wldNA, order, method = "radix", na.last = NA)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE, na.last = NA))), lapply(wldNA, order, method = "radix", decreasing = TRUE, na.last = NA)) # get starts expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, starts = TRUE))), lapply(wldNA, order, method = "radix")) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE, starts = TRUE))), lapply(wldNA, order, method = "radix", decreasing = TRUE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, na.last = FALSE, starts = TRUE))), lapply(wldNA, order, method = "radix", na.last = FALSE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE, na.last = FALSE, starts = TRUE))), lapply(wldNA, order, method = "radix", decreasing = TRUE, na.last = FALSE)) # get group.sizes expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, group.sizes = TRUE))), lapply(wldNA, order, method = "radix")) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE, group.sizes = TRUE))), lapply(wldNA, order, method = "radix", decreasing = TRUE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, na.last = FALSE, group.sizes = TRUE))), lapply(wldNA, order, method = "radix", na.last = FALSE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE, na.last = FALSE, group.sizes = TRUE))), lapply(wldNA, order, method = "radix", decreasing = TRUE, na.last = FALSE)) # get starts and group.sizes expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, starts = TRUE, group.sizes = TRUE))), lapply(wldNA, order, method = "radix")) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE, starts = TRUE, group.sizes = TRUE))), lapply(wldNA, order, method = "radix", decreasing = TRUE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, na.last = FALSE, starts = TRUE, group.sizes = TRUE))), lapply(wldNA, order, method = "radix", na.last = FALSE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE, na.last = FALSE, starts = TRUE, group.sizes = TRUE))), lapply(wldNA, order, method = "radix", decreasing = TRUE, na.last = FALSE)) randcols <- function(n = 3) replicate(n, sample.int(11, sample.int(5, 1)), simplify = FALSE) order2 <- function(x, ...) do.call(order, c(gv(wldNA, x), list(...))) # Ordering by multiple variables rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x)))), lapply(rc, order2, method = "radix")) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE))), lapply(rc, order2, method = "radix", decreasing = TRUE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), na.last = FALSE))), lapply(rc, order2, method = "radix", na.last = FALSE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE, na.last = FALSE))), lapply(rc, order2, method = "radix", decreasing = TRUE, na.last = FALSE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), na.last = NA))), lapply(rc, order2, method = "radix", na.last = NA)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE, na.last = NA))), lapply(rc, order2, method = "radix", decreasing = TRUE, na.last = NA)) # get starts expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), starts = TRUE))), lapply(rc, order2, method = "radix")) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE, starts = TRUE))), lapply(rc, order2, method = "radix", decreasing = TRUE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), na.last = FALSE, starts = TRUE))), lapply(rc, order2, method = "radix", na.last = FALSE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE, na.last = FALSE, starts = TRUE))), lapply(rc, order2, method = "radix", decreasing = TRUE, na.last = FALSE)) # get group.sizes expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), group.sizes = TRUE))), lapply(rc, order2, method = "radix")) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE, group.sizes = TRUE))), lapply(rc, order2, method = "radix", decreasing = TRUE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), na.last = FALSE, group.sizes = TRUE))), lapply(rc, order2, method = "radix", na.last = FALSE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE, na.last = FALSE, group.sizes = TRUE))), lapply(rc, order2, method = "radix", decreasing = TRUE, na.last = FALSE)) # get starts and group.sizes expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), starts = TRUE, group.sizes = TRUE))), lapply(rc, order2, method = "radix")) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE, starts = TRUE, group.sizes = TRUE))), lapply(rc, order2, method = "radix", decreasing = TRUE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), na.last = FALSE, starts = TRUE, group.sizes = TRUE))), lapply(rc, order2, method = "radix", na.last = FALSE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE, na.last = FALSE, starts = TRUE, group.sizes = TRUE))), lapply(rc, order2, method = "radix", decreasing = TRUE, na.last = FALSE)) }) test_that("GRP works as intended", { expect_visible(GRP(unname(as.list(mtcars)))) expect_visible(GRP(unname(as.list(mtcars)), 8:9)) expect_equal(GRPnames(GRP(mtcars$cyl)), c("4","6","8")) expect_equal(GRPnames(GRP(mtcars$cyl), FALSE), c(4, 6, 8)) expect_identical(GRPnames(GRP(mtcars$cyl, return.groups = FALSE)), NULL) expect_output(print(GRP(mtcars, ~ cyl + am))) expect_output(print(GRP(mtcars, ~ cyl + am, return.groups = FALSE))) # expect_invisible(plot(GRP(mtcars, ~ cyl + am))) expect_identical(GRP(GRP(mtcars$mpg)), GRP(mtcars$mpg)) expect_identical(GRP.default(GRP(mtcars$mpg)), GRP(mtcars$mpg)) expect_equal(GRP(mtcars$mpg)[[2]], unattrib(as.factor(mtcars$mpg))) expect_equal(GRP(mtcars$cyl)[[2]], unattrib(as.factor(mtcars$cyl))) expect_equal(GRP(wlddev2$country)[[2]], unattrib(as.factor(wlddev2$country))) expect_equal(GRP(wlddev2$PCGDP)[[2]], unattrib(factor(wlddev2$PCGDP, exclude = NULL))) expect_equal(GRP(mtcars$mpg)[[1]], attributes(qG(mtcars$mpg))[[1]]) expect_equal(GRP(mtcars$cyl)[[1]], attributes(qG(mtcars$cyl))[[1]]) expect_equal(GRP(wlddev2$country)[[1]], attributes(qG(wlddev2$country))[[1]]) expect_equal(GRP(wlddev2$PCGDP)[[1]], attributes(qG(wlddev2$PCGDP, na.exclude = FALSE))[[1]]) expect_equal(GRP(mtcars$mpg)[[4]][[1]], attributes(qG(mtcars$mpg, return.groups = TRUE))[[2]]) expect_equal(GRP(mtcars$cyl)[[4]][[1]], attributes(qG(mtcars$cyl, return.groups = TRUE))[[2]]) expect_equal(GRP(wlddev2$country)[[4]][[1]], attributes(qG(wlddev2$country, return.groups = TRUE))[[2]]) expect_equal(GRP(wlddev2$PCGDP)[[4]][[1]], attributes(qG(wlddev2$PCGDP, na.exclude = FALSE, return.groups = TRUE))[[2]]) expect_visible(GRP(1:10)) expect_visible(GRP(1:10, decreasing = TRUE)) expect_visible(GRP(mtcNA$mpg)) expect_visible(GRP(mtcNA$mpg, return.groups = FALSE)) expect_visible(GRP(mtcNA$mpg, return.groups = FALSE, return.order = TRUE)) expect_visible(GRP(mtcNA$mpg, na.last = FALSE)) expect_visible(GRP(mtcNA$mpg, na.last = FALSE, decreasing = TRUE)) expect_visible(GRP(mtcNA$mpg, na.last = FALSE, decreasing = TRUE, return.order = TRUE)) expect_visible(GRP(list(a = 1:3, b = 1:3))) expect_visible(GRP(mtcars)) expect_visible(GRP(mtcNA)) expect_visible(GRP(mtcNA, return.groups = FALSE)) expect_visible(GRP(mtcNA, return.groups = FALSE, return.order = TRUE)) expect_visible(GRP(mtcNA, na.last = FALSE)) expect_visible(GRP(mtcNA, na.last = FALSE, decreasing = TRUE)) expect_visible(GRP(mtcNA, na.last = FALSE, decreasing = TRUE, return.order = TRUE)) expect_visible(GRP(wlddev2)) expect_visible(GRP(wlddev2, return.groups = FALSE)) expect_true(all_obj_equal(GRP(mtcars, ~ cyl + vs + am)[1:7], GRP(mtcars, c("cyl","vs","am"))[1:7], GRP(mtcars, c(2,8:9))[1:7])) }) test_that("GRP gives errors for wrong input", { expect_error(GRP(mtcars$mpg, na.last = NA)) expect_error(GRP(~ bla)) expect_error(GRP(1:10, 1)) expect_error(GRP(1:10, ~ cyl)) expect_error(GRP(1:10, "cyl")) expect_error(GRP(mtcars, TRUE)) expect_error(GRP(mtcars, ~ cyl + bla)) expect_error(GRP(mtcars, c("bal","cyl"))) expect_error(GRP(mtcars, 11:12)) expect_error(GRP(list(a = 1:3, b = 1:4))) expect_visible(GRP(mtcars, ~ cyl + vs, order = -1L)) }) test_that("fgroup_by works as intended", { ca <- function(x) { nam <- names(x[[4L]]) attributes(x[[4L]]) <- NULL names(x[[4L]]) <- nam x } expect_output(print(fgroup_by(mtcars, cyl, vs, am))) expect_equal(GRP(fgroup_by(mtcars, cyl, vs, am)), ca(GRP(mtcars, ~ cyl + vs + am, call = FALSE))) expect_equal(GRP(fgroup_by(mtcars, c("cyl", "vs", "am"))), ca(GRP(mtcars, ~ cyl + vs + am, call = FALSE))) expect_equal(GRP(fgroup_by(mtcars, c(2, 8:9))), ca(GRP(mtcars, ~ cyl + vs + am, call = FALSE))) expect_identical(fungroup(fgroup_by(mtcars, cyl, vs, am)), mtcars) expect_equal(fgroup_by(fgroup_by(mtcars, cyl, vs, am), cyl), fgroup_by(mtcars, cyl)) # The issue is that GRP.grouped_df does not reclass the groups... take up another time. # This is to fool very silly checks on CRAN scanning the code of the tests # group_by <- eval(parse(text = paste0("dplyr", ":", ":", "group_by"))) # expect_equal(GRP(group_by(mtcars, cyl, vs, am), call = FALSE), GRP(as.list(mtcars), ~ cyl + vs + am, call = FALSE)) # expect_equal(GRP(group_by(mtcNA, cyl, vs, am)), GRP(mtcNA, ~ cyl + vs + am, call = NULL)) # expect_equal(GRP(group_by(GGDC10S, Variable, Country)), GRP(GGDC10S, ~ Variable + Country, call = FALSE)) # expect_equal(GRP(group_by(GGDCNA, Variable, Country)), GRP(GGDCNA, ~ Variable + Country, call = NULL)) # expect_equal(GRP(group_by(wlddev, region, year)), GRP(wlddev, ~ region + year, call = NULL)) # expect_equal(GRP(group_by(wldNA, region, year)), GRP(wldNA, ~ region + year, call = NULL)) }) gdat <- gby(GGDCNA, Variable, Country) test_that("fgroup_vars works as intended", { expect_identical(fgroup_vars(gdat), slt(GGDCNA, Variable, Country)) expect_identical(fgroup_vars(gdat, "unique"), funique(slt(GGDCNA, Variable, Country), sort = TRUE)) expect_identical(fgroup_vars(gdat, "names"), .c(Variable, Country)) expect_identical(fgroup_vars(gdat, "indices"), c(4L, 1L)) expect_identical(fgroup_vars(gdat, "named_indices"), setNames(c(4L, 1L), .c(Variable, Country))) expect_identical(fgroup_vars(gdat, "logical"), `[<-`(logical(fncol(GGDCNA)), c(4L, 1L), TRUE)) expect_identical(fgroup_vars(gdat, "named_logical"), setNames(`[<-`(logical(fncol(GGDCNA)), c(4L, 1L), TRUE), names(GGDC10S))) expect_error(fgroup_vars(gdat, "bla")) }) test_that("GRP <> factor conversions run seamlessly", { expect_identical(unclass(iris$Species), unclass(as_factor_GRP(GRP(iris$Species)))) # as_factor_GRP always adds class "na.included" expect_identical(unclass(wlddev$iso3c[1:200]), unclass(as_factor_GRP(GRP(wlddev$iso3c[1:200])))) # as_factor_GRP always adds class "na.included" expect_identical(unclass(fdroplevels(wlddev$iso3c[1:200])), unclass(as_factor_GRP(GRP(wlddev$iso3c[1:200], drop = TRUE)))) # as_factor_GRP always adds class "na.included" expect_identical(unclass(`vlabels<-`(wlddev2$iso3c, "label", NULL)), unclass(as_factor_GRP(GRP(wlddev2$iso3c)))) set.seed(101) int <- sample.int(10,100,TRUE) expect_identical(unclass(qF(int)), unclass(as_factor_GRP(GRP(int)))) expect_identical(unclass(qF(int)), unclass(as_factor_GRP(GRP(qF(int))))) intNA <- int set.seed(101) intNA[sample(100,20)] <- NA expect_identical(unclass(qF(intNA, na.exclude = FALSE)), unclass(as_factor_GRP(GRP(intNA)))) expect_identical(unclass(qF(intNA, na.exclude = FALSE)), unclass(as_factor_GRP(GRP(qF(intNA))))) dblNA <- as.double(intNA) expect_false(identical(unclass(qF(dblNA)), unclass(as_factor_GRP(GRP(dblNA))))) # qF with na.exclude = TRUE retains double NA's... expect_false(identical(unclass(qF(dblNA)), unclass(as_factor_GRP(GRP(qF(dblNA)))))) expect_identical(qF(dblNA, na.exclude = FALSE), as_factor_GRP(GRP(dblNA))) expect_identical(qF(dblNA, na.exclude = FALSE), as_factor_GRP(GRP(qF(dblNA)))) expect_identical(qF(dblNA, na.exclude = FALSE), as_factor_GRP(GRP(qF(dblNA, na.exclude = FALSE)))) }) # could also do qG to GRP, but qG is same as factor.. and is a programmers function anyway.. test_that("qF and qG work as intended", { af <- lapply(wlddev2, function(x) as.factor(x)) expect_equal(af[!fact_vars(wlddev2, "logical")], lapply(gv(wlddev2, !fact_vars(wlddev2, "logical")), function(x) unlab(qF(x, method = "radix")))) expect_equal(af[!fact_vars(wlddev2, "logical")], lapply(gv(wlddev2, !fact_vars(wlddev2, "logical")), function(x) unlab(qF(x, method = "hash")))) af <- lapply(af, unattrib) expect_identical(af, lapply(wlddev2, function(x) unattrib(qF(x, method = "radix")))) expect_identical(af, lapply(wlddev2, function(x) unattrib(qF(x, method = "hash")))) expect_identical(af, lapply(wlddev2, function(x) unattrib(qG(x, method = "radix")))) expect_identical(af, lapply(wlddev2, function(x) unattrib(qG(x, method = "hash")))) afNA <- lapply(wldNA, function(x) as.factor(x)) expect_equal(afNA[!fact_vars(wlddev2, "logical")], lapply(gv(wldNA, !fact_vars(wlddev2, "logical")), function(x) unlab(qF(x, method = "radix")))) expect_equal(afNA[!fact_vars(wlddev2, "logical")], lapply(gv(wldNA, !fact_vars(wlddev2, "logical")), function(x) unlab(qF(x, method = "hash")))) afNA <- lapply(afNA, unattrib) expect_identical(afNA, lapply(wldNA, function(x) unattrib(qF(x, method = "radix")))) expect_identical(afNA, lapply(wldNA, function(x) unattrib(qF(x, method = "hash")))) expect_identical(afNA, lapply(wldNA, function(x) unattrib(qG(x, method = "radix")))) expect_identical(afNA, lapply(wldNA, function(x) unattrib(qG(x, method = "hash")))) afnoNA <- lapply(wldNA, function(x) factor(x, exclude = NULL)) expect_equal(lapply(afnoNA[!fact_vars(wlddev2, "logical")], unclass), lapply(gv(wldNA, !fact_vars(wlddev2, "logical")), function(x) unclass(unlab(qF(x, method = "radix", na.exclude = FALSE))))) expect_equal(lapply(afnoNA[!fact_vars(wlddev2, "logical")], unclass), lapply(gv(wldNA, !fact_vars(wlddev2, "logical")), function(x) unclass(unlab(qF(x, method = "hash", na.exclude = FALSE))))) afnoNA <- lapply(afnoNA, unattrib) expect_identical(afnoNA, lapply(wldNA, function(x) unattrib(qF(x, method = "radix", na.exclude = FALSE)))) expect_identical(afnoNA, lapply(wldNA, function(x) unattrib(qF(x, method = "hash", na.exclude = FALSE)))) expect_identical(afnoNA, lapply(wldNA, function(x) unattrib(qG(x, method = "radix", na.exclude = FALSE)))) expect_identical(afnoNA, lapply(wldNA, function(x) unattrib(qG(x, method = "hash", na.exclude = FALSE)))) countryf <- as.factor(wlddev2$country) expect_identical(countryf, unlab(qF(wlddev2$country))) expect_identical(countryf, unlab(qF(wlddev2$country, method = "radix"))) expect_identical(countryf, unlab(qF(wlddev2$country, method = "hash"))) # identical(as.factor(wlddev2$iso3c), wlddev2$iso3c) expect_identical(levels(wlddev2$iso3c), levels(unlab(qF(wlddev2$iso3c)))) expect_identical(unattrib(wlddev2$iso3c), unattrib(unlab(qF(wlddev2$iso3c)))) expect_identical(class(wlddev2$iso3c), class(unlab(qF(wlddev2$iso3c)))) expect_equal(lapply(wlddev2, function(x) qF(x, method = "radix")), lapply(wlddev2, function(x) qF(x, method = "hash"))) expect_equal(lapply(wldNA, function(x) qF(x, method = "radix")), lapply(wldNA, function(x) qF(x, method = "hash"))) expect_equal(lapply(wlddev2, function(x) qG(x, method = "radix")), lapply(wlddev2, function(x) qG(x, method = "hash"))) expect_equal(lapply(wldNA, function(x) qG(x, method = "radix")), lapply(wldNA, function(x) qG(x, method = "hash"))) expect_equal(lapply(wlddev2, function(x) qF(x, method = "radix", na.exclude = FALSE)), lapply(wlddev2, function(x) qF(x, method = "hash", na.exclude = FALSE))) expect_equal(lapply(wldNA, function(x) qF(x, method = "radix", na.exclude = FALSE)), lapply(wldNA, function(x) qF(x, method = "hash", na.exclude = FALSE))) expect_equal(lapply(wlddev2, function(x) qG(x, method = "radix", na.exclude = FALSE)), lapply(wlddev2, function(x) qG(x, method = "hash", na.exclude = FALSE))) expect_equal(lapply(wldNA, function(x) qG(x, method = "radix", na.exclude = FALSE)), lapply(wldNA, function(x) qG(x, method = "hash", na.exclude = FALSE))) # Testing reordering of factor levels expect_identical(qF(wlddev$iso3c), wlddev$iso3c) riso3 <- rev(wlddev$iso3c) expect_identical(qF(riso3), riso3) expect_identical(qF(riso3, sort = FALSE), factor(riso3, levels = funique(riso3))) iso3na <- na_insert(wlddev$iso3c) expect_identical(qF(iso3na), iso3na) expect_identical(unclass(qF(iso3na, na.exclude = FALSE, keep.attr = FALSE)), unclass(addNA(iso3na))) riso3na <- na_insert(riso3) expect_identical(qF(riso3na), riso3na) expect_identical(unclass(qF(riso3na, na.exclude = FALSE, keep.attr = FALSE)), unclass(addNA(riso3na))) expect_identical(qF(riso3na, sort = FALSE), factor(riso3na, levels = funique(riso3))) expect_identical(unclass(qF(riso3na, sort = FALSE, na.exclude = FALSE)), unclass(factor(riso3na, levels = funique(riso3na), exclude = NULL))) expect_identical(unclass(qF(riso3na, sort = FALSE, na.exclude = FALSE)), unclass(factor(riso3na, levels = unique(riso3na), exclude = NULL))) }) # Could still refine this code, but is not at all critical !! date <- qG(wlddev$date, return.groups = TRUE) dateg <- GRP(date, call = FALSE) dateg$ordered <- NULL date <- wlddev$date vlabels(date) <- NULL dateg2 <- GRP(date, call = FALSE) dateg2$ordered <- NULL test_that("GRP <> qG and factor <> qG conversions work", { # expect_equal(dateg, dateg2) expect_equal(qF(unattrib(wlddev$country)), as.factor_qG(qG(unattrib(wlddev$country), return.groups = TRUE))) expect_equal(qF(unattrib(wlddev$country)), qF(qG(unattrib(wlddev$country), return.groups = TRUE))) expect_equal(qG(unattrib(wlddev$country)), qG(qF(unattrib(wlddev$country)))) expect_equal(qG(unattrib(wlddev$country), return.groups = TRUE), qG(qF(unattrib(wlddev$country)), return.groups = TRUE)) }) base_group <- function(x, sort = FALSE, group.sizes = FALSE) { if(sort) o <- if(is.list(x)) do.call(order, c(x, list(method = "radix"))) else order(x, method = "radix") if(is.list(x)) x <- do.call(paste, c(x, list(sep = "."))) ux <- unique(if(sort) x[o] else x) r <- match(x, ux) attr(r, "N.groups") <- length(ux) if(group.sizes) attr(r, "group.sizes") <- tabulate(r, length(ux)) if(!sort) oldClass(r) <- c("qG", "na.included") r } test_that("group() works as intended", { wlduo <- wlddev[order(rnorm(nrow(wldNA))), ] dlist <- c(mtcNA, wlddev, wlduo, GGDCNA, airquality) # Single grouping variable expect_identical(lapply(dlist, group, group.sizes = TRUE), lapply(dlist, base_group, group.sizes = TRUE)) # Multiple grouping variables g <- replicate(50, sample.int(11, sample.int(6, 1)), simplify = FALSE) expect_identical(lapply(g, function(i) group(.subset(mtcars, i), group.sizes = TRUE)), lapply(g, function(i) base_group(.subset(mtcars, i), group.sizes = TRUE))) g <- replicate(30, sample.int(13, sample.int(4, 1)), simplify = FALSE) expect_identical(lapply(g, function(i) group(.subset(wlduo, i), group.sizes = TRUE)), lapply(g, function(i) base_group(.subset(wlduo, i), group.sizes = TRUE))) g <- replicate(30, sample.int(13, 3, replace = TRUE), simplify = FALSE) expect_identical(lapply(g, function(i) group(.subset(wlduo, i), group.sizes = TRUE)), lapply(g, function(i) base_group(.subset(wlduo, i), group.sizes = TRUE))) # Positive and negative values give the same grouping nwld <- nv(wlduo) expect_identical(lapply(nwld, group), lapply(nwld %c*% -1, group)) expect_visible(group(nwld %c*% -1)) expect_visible(group(nwld[c(4,2,3)] %c*% -1)) }) GRP2 <- function(x) { g <- GRP.default(x, sort = TRUE, return.groups = FALSE, call = FALSE) r <- g[[2]] attr(r, "N.groups") <- g[[1]] attr(r, "group.sizes") <- g[[3]] r } qG2 <- function(x, method = "auto", sort = TRUE) unclass(qG(x, na.exclude = FALSE, sort = sort, method = method)) test_that("GRP2() and qG2 work as intended", { wlduo <- wlddev[order(rnorm(nrow(wldNA))), ] dlist <- c(mtcNA, wlddev, wlduo, GGDCNA, airquality) # Single grouping variable expect_identical(lapply(dlist, GRP2), lapply(dlist, base_group, sort = TRUE, group.sizes = TRUE)) bgres <- lapply(dlist, base_group, sort = TRUE) expect_identical(lapply(dlist, qG2), bgres) expect_identical(lapply(dlist, qG2, method = "hash"), bgres) expect_identical(lapply(dlist, qG2, method = "radix"), bgres) expect_true(all_identical(qG2(wlduo$country, method = "radix", sort = FALSE), qG2(wlduo$country, method = "hash", sort = FALSE), unclass(base_group(wlduo$country, sort = FALSE)))) # Multiple grouping variables g <- replicate(50, sample.int(11, sample.int(6, 1)), simplify = FALSE) expect_identical(lapply(g, function(i) GRP2(.subset(mtcars, i))), lapply(g, function(i) base_group(.subset(mtcars, i), sort = TRUE, group.sizes = TRUE))) g <- replicate(30, sample.int(13, sample.int(4, 1)), simplify = FALSE) expect_identical(lapply(g, function(i) GRP2(.subset(wlduo, i))), lapply(g, function(i) base_group(.subset(wlduo, i), sort = TRUE, group.sizes = TRUE))) g <- replicate(30, sample.int(13, 3, replace = TRUE), simplify = FALSE) expect_identical(lapply(g, function(i) GRP2(.subset(wlduo, i))), lapply(g, function(i) base_group(.subset(wlduo, i), sort = TRUE, group.sizes = TRUE))) }) test_that("GRP2() works as intended", { wlduo <- wlddev[order(rnorm(nrow(wldNA))), ] dlist <- c(mtcNA, wlddev, wlduo, GGDCNA, airquality) # Single grouping variable expect_identical(lapply(dlist, GRP2), lapply(dlist, base_group, sort = TRUE, group.sizes = TRUE)) # Multiple grouping variables g <- replicate(50, sample.int(11, sample.int(6, 1)), simplify = FALSE) expect_identical(lapply(g, function(i) GRP2(.subset(mtcars, i))), lapply(g, function(i) base_group(.subset(mtcars, i), sort = TRUE, group.sizes = TRUE))) g <- replicate(30, sample.int(13, sample.int(4, 1)), simplify = FALSE) expect_identical(lapply(g, function(i) GRP2(.subset(wlduo, i))), lapply(g, function(i) base_group(.subset(wlduo, i), sort = TRUE, group.sizes = TRUE))) g <- replicate(30, sample.int(13, 3, replace = TRUE), simplify = FALSE) expect_identical(lapply(g, function(i) GRP2(.subset(wlduo, i))), lapply(g, function(i) base_group(.subset(wlduo, i), sort = TRUE, group.sizes = TRUE))) }) if(identical(Sys.getenv("NCRAN"), "TRUE")) { # This is to fool very silly checks on CRAN scanning the code of the tests pwlddev <- eval(parse(text = paste0("plm", ":", ":", "pdata.frame(wlddev, index = c('iso3c', 'year'))"))) iso3c <- eval(parse(text = paste0("plm", ":", ":", "index(pwlddev, 1L)"))) year <- eval(parse(text = paste0("plm", ":", ":", "index(pwlddev, 2L)"))) test_that("GRP pseries and pdata.frame methods work as intended", { expect_equal(GRP(pwlddev, call = FALSE), GRP(iso3c, call = FALSE)) expect_equal(GRP(pwlddev$PCGDP, call = FALSE), GRP(pwlddev, call = FALSE)) expect_equal(GRP(pwlddev, effect = "year", call = FALSE), GRP(year, call = FALSE)) expect_equal(GRP(pwlddev$PCGDP, effect = "year", call = FALSE), GRP(pwlddev, effect = "year", call = FALSE)) }) } fl <- slt(wlddev, region, income) set.seed(101) flNA <- na_insert(fl) test_that("finteraction works as intended", { expect_equal(`oldClass<-`(finteraction(fl), "factor"), base::interaction(fl, drop = TRUE, lex.order = TRUE)) expect_equal(`oldClass<-`(finteraction(ss(fl, 1:300)), "factor"), base::interaction(ss(fl, 1:300), drop = TRUE, lex.order = TRUE)) # missing levels # Missing value behavior is always different !! # expect_equal(`oldClass<-`(finteraction(flNA), "factor"), factor(base::interaction(flNA, drop = TRUE, lex.order = TRUE), exclude = NULL)) # expect_equal(`oldClass<-`(finteraction(ss(flNA, 1:300)), "factor"), base::interaction(ss(flNA, 1:300), drop = TRUE, lex.order = TRUE)) }) wld150 <- ss(wlddev, 1:150) vlabels(wld150) <- NULL set.seed(101) wldNA150 <- na_insert(ss(wlddev, 1:150)) vlabels(wldNA150) <- NULL test_that("fdroplevels works as intended", { expect_identical(fdroplevels(wld150), droplevels(wld150)) expect_identical(fdroplevels(wldNA150), droplevels(wldNA150)) expect_identical(fdroplevels(wld150$iso3c), droplevels(wld150$iso3c)) expect_identical(fdroplevels(wldNA150$iso3c), droplevels(wldNA150$iso3c)) expect_message(fdroplevels(1:3)) expect_warning(fdroplevels(wld150, bla = 1)) expect_warning(fdroplevels(wld150$iso3c, bla = 1)) expect_error(fdroplevels.factor(wld150$country)) }) # Note: Should extend with other than just character data.. rctry <- wlddev$country[order(rnorm(length(wlddev$country)))] set.seed(101) rctryNA <- na_insert(rctry) rdat <- sbt(GGDC10S, order(rnorm(length(Variable))), Variable, Country) vlabels(rdat) <- NULL vlabels(rdat, "format.stata") <- NULL set.seed(101) rdatNA <- na_insert(rdat) test_that("funique works well", { expect_equal(funique(rctry), unique(rctry)) expect_equal(funique(rctry, sort = TRUE), sort(unique(rctry))) expect_equal(funique(rctryNA), unique(rctryNA)) expect_equal(funique(rctryNA, sort = TRUE), c(sort(unique(rctryNA)), NA)) expect_equal(funique(mtcars[.c(cyl, vs, am)]), unique(mtcars[.c(cyl, vs, am)])) expect_equal(funique(mtcNA[.c(cyl, vs, am)]), unique(mtcNA[.c(cyl, vs, am)])) expect_equal(funique(rdat), setRownames(unique(rdat))) expect_equal(funique(rdat, sort = TRUE), roworderv(unique(rdat))) expect_equal(funique(rdatNA), setRownames(unique(rdatNA))) expect_equal(funique(rdatNA, sort = TRUE), roworderv(unique(rdatNA))) }) collapse/tests/testthat/test-fmedian.R0000644000176200001440000012110714167353441017573 0ustar liggesuserscontext("fmedian and fnth") bmean <- base::mean bsum <- base::sum bmin <- base::min bmax <- base::max bmedian <- stats::median # rm(list = ls()) set.seed(101) x <- rnorm(100) w <- as.integer(round(10*abs(rnorm(100)))) # -> Numeric precision issues in R wdat <- as.integer(round(10*abs(rnorm(32)))) xNA <- x wNA <- w xNA[sample.int(100,20)] <- NA wNA[is.na(xNA)] <- NA # only missing weights if x also missing f <- as.factor(sample.int(10, 100, TRUE)) g <- GRP(mtcars, ~ cyl + vs + am) gf <- as_factor_GRP(g) mtcNA <- na_insert(mtcars) mtcNA[27, 1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" nth <- function(x, n, na.rm = FALSE) { if(na.rm) { if(n > 1) n <- (n-1)/(length(x)-1L) x <- na_rm(x) if(!length(x)) return(NA_real_) } else { if(anyNA(x)) return(NA_real_) } if(n < 1) { n <- as.integer((length(x)-1L)*n)+1L if(n < 2L) return(bmin(x)) } sort(x, partial = n)[n] } wnth <- function(x, n = 0.5, w, na.rm = FALSE, ties = "mean") { cc <- complete.cases(x, w) if(na.rm) { x <- x[cc] w <- w[cc] if(!length(x)) return(NA_real_) } else if(!all(cc)) return(NA_real_) sumwh <- bsum(w) * n if(sumwh == 0) return(NA_real_) if(length(x) < 2L) return(x) lp1 <- function(x) if(length(x)) x[length(x)] + 1L else 1L mean2 <- function(x) bsum(x) / length(x) o <- radixorder(x) csumw <- base::cumsum(w[o]) if(csumw[1L] > sumwh) return(x[o[1L]]) switch(ties, mean = mean2(x[o[lp1(which(csumw < sumwh)):lp1(which(csumw <= sumwh))]]), min = x[o[lp1(which(csumw < sumwh))]], max = x[o[lp1(which(csumw <= sumwh))]]) } wmedian <- function(x, w, na.rm = FALSE) wnth(x, 0.5, w, na.rm, "mean") # matrixStats::weightedMedian(x, w, ties = ties) -> doesn't always properly average if ties = "mean"... wBY <- function(x, f, FUN, w, ...) { if(is.atomic(x) && !is.array(x)) return(mapply(FUN, split(x, f), split(w, f), ...)) wspl <- split(w, f) if(is.atomic(x)) return(dapply(x, function(xi) mapply(FUN, split(xi, f), wspl, ...))) qDF(dapply(x, function(xi) mapply(FUN, split(xi, f), wspl, ...), return = "matrix")) } test_that("fmedian performs like base::median", { expect_equal(fmedian(NA), as.double(bmedian(NA))) expect_equal(fmedian(NA, na.rm = FALSE), as.double(bmedian(NA))) expect_equal(fmedian(1), bmedian(1, na.rm = TRUE)) expect_equal(fmedian(1:3), bmedian(1:3, na.rm = TRUE)) expect_equal(fmedian(-1:1), bmedian(-1:1, na.rm = TRUE)) expect_equal(fmedian(1, na.rm = FALSE), bmedian(1)) expect_equal(fmedian(1:3, na.rm = FALSE), bmedian(1:3)) expect_equal(fmedian(-1:1, na.rm = FALSE), bmedian(-1:1)) expect_equal(fmedian(x), bmedian(x, na.rm = TRUE)) expect_equal(fmedian(x, na.rm = FALSE), bmedian(x)) expect_equal(fmedian(xNA, na.rm = FALSE), bmedian(xNA)) expect_equal(fmedian(xNA), bmedian(xNA, na.rm = TRUE)) expect_equal(fmedian(mtcars), fmedian(m)) expect_equal(fmedian(m), dapply(m, bmedian, na.rm = TRUE)) expect_equal(fmedian(m, na.rm = FALSE), dapply(m, bmedian)) expect_equal(fmedian(mNA, na.rm = FALSE), dapply(mNA, bmedian)) expect_equal(fmedian(mNA), dapply(mNA, bmedian, na.rm = TRUE)) expect_equal(fmedian(mtcars), dapply(mtcars, bmedian, na.rm = TRUE)) expect_equal(fmedian(mtcars, na.rm = FALSE), dapply(mtcars, bmedian)) expect_equal(fmedian(mtcNA, na.rm = FALSE), dapply(mtcNA, bmedian)) expect_equal(fmedian(mtcNA), dapply(mtcNA, bmedian, na.rm = TRUE)) expect_equal(fmedian(x, f), BY(x, f, bmedian, na.rm = TRUE)) expect_equal(fmedian(x, f, na.rm = FALSE), BY(x, f, bmedian)) expect_equal(fmedian(xNA, f, na.rm = FALSE), BY(xNA, f, bmedian)) expect_equal(fmedian(xNA, f), BY(xNA, f, bmedian, na.rm = TRUE)) expect_equal(fmedian(m, g), BY(m, g, bmedian, na.rm = TRUE)) expect_equal(fmedian(m, g, na.rm = FALSE), BY(m, g, bmedian)) expect_equal(fmedian(mNA, g, na.rm = FALSE), BY(mNA, g, bmedian)) expect_equal(fmedian(mNA, g), BY(mNA, g, bmedian, na.rm = TRUE)) expect_equal(fmedian(mtcars, g), BY(mtcars, g, bmedian, na.rm = TRUE)) expect_equal(fmedian(mtcars, g, na.rm = FALSE), BY(mtcars, g, bmedian)) expect_equal(fmedian(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bmedian)) expect_equal(fmedian(mtcNA, g), BY(mtcNA, g, bmedian, na.rm = TRUE)) }) test_that("fmedian performs like fmedian with weights all equal", { expect_equal(fmedian(NA), fmedian(NA, w = 1)) expect_equal(fmedian(NA, na.rm = FALSE), fmedian(NA, w = 1, na.rm = FALSE)) expect_equal(fmedian(1), fmedian(1, w = 3)) expect_equal(fmedian(1:3), fmedian(1:3, w = rep(1,3))) expect_equal(fmedian(-1:1), fmedian(-1:1, w = rep(4.2,3))) expect_equal(fmedian(1, na.rm = FALSE), fmedian(1, w = 5, na.rm = FALSE)) expect_equal(fmedian(1:3, na.rm = FALSE), fmedian(1:3, w = rep(1, 3), na.rm = FALSE)) expect_equal(fmedian(-1:1, na.rm = FALSE), fmedian(-1:1, w = rep(12, 3), na.rm = FALSE)) expect_equal(fmedian(x), fmedian(x, w = rep(1,100))) expect_equal(fmedian(x, na.rm = FALSE), fmedian(x, w = rep(1, 100), na.rm = FALSE)) expect_equal(fmedian(xNA, na.rm = FALSE), fmedian(xNA, w = rep(5, 100), na.rm = FALSE)) expect_equal(fmedian(xNA), fmedian(xNA, w = rep(4, 100))) expect_equal(fmedian(m), fmedian(m, w = rep(6587, 32))) expect_equal(fmedian(m, na.rm = FALSE), fmedian(m, w = rep(6587, 32), na.rm = FALSE)) expect_equal(fmedian(mNA, na.rm = FALSE), fmedian(mNA, w = rep(6587, 32), na.rm = FALSE)) expect_equal(fmedian(mNA), fmedian(mNA, w = rep(6587, 32))) expect_equal(fmedian(mtcars), fmedian(mtcars, w = rep(6787, 32))) expect_equal(fmedian(mtcars, na.rm = FALSE), fmedian(mtcars, w = rep(6787, 32), na.rm = FALSE)) expect_equal(fmedian(mtcNA, na.rm = FALSE), fmedian(mtcNA, w = rep(6787, 32), na.rm = FALSE)) expect_equal(fmedian(mtcNA), fmedian(mtcNA, w = rep(6787, 32))) expect_equal(fmedian(x, f), fmedian(x, f, rep(547,100))) expect_equal(fmedian(x, f, na.rm = FALSE), fmedian(x, f, rep(6, 100), na.rm = FALSE)) expect_equal(fmedian(xNA, f, na.rm = FALSE), fmedian(xNA, f, rep(52,100), na.rm = FALSE)) expect_equal(fmedian(xNA, f), fmedian(xNA, f, rep(5997456,100))) expect_equal(fmedian(m, g), fmedian(m, g, rep(546,32))) expect_equal(fmedian(m, g, na.rm = FALSE), fmedian(m, g, rep(1,32), na.rm = FALSE)) expect_equal(fmedian(mNA, g, na.rm = FALSE), fmedian(mNA, g, rep(5,32), na.rm = FALSE)) expect_equal(fmedian(mNA, g), fmedian(mNA, g, rep(1,32))) expect_equal(fmedian(mtcars, g), fmedian(mtcars, g, rep(53,32))) expect_equal(fmedian(mtcars, g, na.rm = FALSE), fmedian(mtcars, g, rep(546,32), na.rm = FALSE)) expect_equal(fmedian(mtcNA, g, na.rm = FALSE), fmedian(mtcNA, g, rep(1,32), na.rm = FALSE)) expect_equal(fmedian(mtcNA, g), fmedian(mtcNA, g, rep(999,32))) }) test_that("fmedian with weights performs like wmedian (defined above)", { # complete weights expect_equal(fmedian(NA, w = 1), wmedian(NA_real_, 1)) expect_equal(fmedian(NA, w = 1, na.rm = FALSE), wmedian(NA_real_, 1)) expect_equal(fmedian(1, w = 1), wmedian(1, w = 1)) expect_equal(fmedian(1:3, w = 1:3), wmedian(1:3, 1:3)) expect_equal(fmedian(-1:1, w = 1:3), wmedian(-1:1, 1:3)) expect_equal(fmedian(1, w = 1, na.rm = FALSE), wmedian(1, 1)) expect_equal(fmedian(1:3, w = c(0.99,3454,1.111), na.rm = FALSE), wmedian(1:3, c(0.99,3454,1.111))) expect_equal(fmedian(-1:1, w = 1:3, na.rm = FALSE), wmedian(-1:1, 1:3)) expect_equal(fmedian(x, w = w), wmedian(x, w)) expect_equal(fmedian(x, w = w, na.rm = FALSE), wmedian(x, w)) expect_equal(fmedian(xNA, w = w, na.rm = FALSE), wmedian(xNA, w)) expect_equal(fmedian(xNA, w = w), wmedian(xNA, w, na.rm = TRUE)) expect_equal(fmedian(mtcars, w = wdat), fmedian(m, w = wdat)) expect_equal(fmedian(m, w = wdat), dapply(m, wmedian, wdat, na.rm = TRUE)) expect_equal(fmedian(m, w = wdat, na.rm = FALSE), dapply(m, wmedian, wdat)) expect_equal(fmedian(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wmedian, wdat)) expect_equal(fmedian(mNA, w = wdat), dapply(mNA, wmedian, wdat, na.rm = TRUE)) expect_equal(fmedian(mtcars, w = wdat), dapply(mtcars, wmedian, wdat, na.rm = TRUE)) expect_equal(fmedian(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wmedian, wdat)) expect_equal(fmedian(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wmedian, wdat)) expect_equal(fmedian(mtcNA, w = wdat), dapply(mtcNA, wmedian, wdat, na.rm = TRUE)) expect_equal(fmedian(x, f, w), wBY(x, f, wmedian, w)) expect_equal(fmedian(x, f, w, na.rm = FALSE), wBY(x, f, wmedian, w)) expect_equal(fmedian(xNA, f, w, na.rm = FALSE), wBY(xNA, f, wmedian, w)) expect_equal(fmedian(xNA, f, w), wBY(xNA, f, wmedian, w, na.rm = TRUE)) expect_equal(fmedian(m, g, wdat), wBY(m, gf, wmedian, wdat)) expect_equal(fmedian(m, g, wdat, na.rm = FALSE), wBY(m, gf, wmedian, wdat)) expect_equal(fmedian(mNA, g, wdat, na.rm = FALSE), wBY(mNA, gf, wmedian, wdat)) expect_equal(fmedian(mNA, g, wdat), wBY(mNA, gf, wmedian, wdat, na.rm = TRUE)) expect_equal(fmedian(mtcars, g, wdat), wBY(mtcars, gf, wmedian, wdat)) expect_equal(fmedian(mtcars, g, wdat, na.rm = FALSE), wBY(mtcars, gf, wmedian, wdat)) expect_equal(fmedian(mtcNA, g, wdat, na.rm = FALSE), wBY(mtcNA, gf, wmedian, wdat)) expect_equal(fmedian(mtcNA, g, wdat), wBY(mtcNA, gf, wmedian, wdat, na.rm = TRUE)) # missing weights: Only supported if x is also missing... expect_equal(fmedian(NA, w = NA), wmedian(NA_real_, NA_real_)) expect_equal(fmedian(NA, w = NA, na.rm = FALSE), wmedian(NA_real_, NA_real_)) expect_equal(fmedian(xNA, w = wNA, na.rm = FALSE), wmedian(xNA, wNA)) expect_equal(fmedian(xNA, w = wNA), wmedian(xNA, wNA, na.rm = TRUE)) expect_equal(fmedian(xNA, f, wNA, na.rm = FALSE), wBY(xNA, f, wmedian, wNA)) expect_equal(fmedian(xNA, f, wNA), wBY(xNA, f, wmedian, wNA, na.rm = TRUE)) }) test_that("fmedian performs numerically stable", { expect_true(all_obj_equal(replicate(50, fmedian(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcNA, g), simplify = FALSE))) }) test_that("fmedian with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fmedian(1, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(NA, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(NA, w = 1, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fmedian with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fmedian(NA, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(NA, w = NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, f, wNA), simplify = FALSE))) }) test_that("fmedian handles special values in the right way", { expect_equal(fmedian(NA), NA_real_) expect_equal(fmedian(NaN), NaN) expect_equal(fmedian(Inf), Inf) expect_equal(fmedian(-Inf), -Inf) expect_equal(fmedian(TRUE), 1) expect_equal(fmedian(FALSE), 0) expect_equal(fmedian(NA, na.rm = FALSE), NA_real_) expect_equal(fmedian(NaN, na.rm = FALSE), NaN) expect_equal(fmedian(Inf, na.rm = FALSE), Inf) expect_equal(fmedian(-Inf, na.rm = FALSE), -Inf) expect_equal(fmedian(TRUE, na.rm = FALSE), 1) expect_equal(fmedian(FALSE, na.rm = FALSE), 0) expect_equal(fmedian(c(1,NA)), 1) expect_equal(fmedian(c(1,NaN)), 1) expect_equal(fmedian(c(1,Inf)), Inf) expect_equal(fmedian(c(1,-Inf)), -Inf) expect_equal(fmedian(c(FALSE,TRUE)), 0.5) expect_equal(fmedian(c(FALSE,FALSE)), 0) expect_equal(fmedian(c(1,Inf), na.rm = FALSE), Inf) expect_equal(fmedian(c(1,-Inf), na.rm = FALSE), -Inf) expect_equal(fmedian(c(FALSE,TRUE), na.rm = FALSE), 0.5) expect_equal(fmedian(c(FALSE,FALSE), na.rm = FALSE), 0) }) test_that("fmedian with weights handles special values in the right way", { expect_equal(fmedian(NA, w = 1), NA_real_) expect_equal(fmedian(NaN, w = 1), NaN) expect_equal(fmedian(Inf, w = 1), Inf) expect_equal(fmedian(-Inf, w = 1), -Inf) expect_equal(fmedian(TRUE, w = 1), 1) expect_equal(fmedian(FALSE, w = 1), 0) expect_equal(fmedian(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fmedian(NaN, w = 1, na.rm = FALSE), NaN) expect_equal(fmedian(Inf, w = 1, na.rm = FALSE), Inf) expect_equal(fmedian(-Inf, w = 1, na.rm = FALSE), -Inf) expect_equal(fmedian(TRUE, w = 1, na.rm = FALSE), 1) expect_equal(fmedian(FALSE, w = 1, na.rm = FALSE), 0) expect_equal(fmedian(NA, w = NA), NA_real_) expect_equal(fmedian(NaN, w = NA), NA_real_) expect_error(fmedian(Inf, w = NA)) expect_error(fmedian(-Inf, w = NA)) expect_error(fmedian(TRUE, w = NA)) expect_error(fmedian(FALSE, w = NA)) expect_equal(fmedian(NA, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmedian(NaN, w = NA, na.rm = FALSE), NA_real_) expect_error(fmedian(Inf, w = NA, na.rm = FALSE)) expect_error(fmedian(-Inf, w = NA, na.rm = FALSE)) expect_error(fmedian(TRUE, w = NA, na.rm = FALSE)) expect_error(fmedian(FALSE, w = NA, na.rm = FALSE)) # expect_equal(fmedian(1:3, w = c(1,Inf,3)), 2) # wmedian gives 2 !!!!!! # expect_equal(fmedian(1:3, w = c(1,-Inf,3)), 1) # wmedian gives 3 !!!!!! # expect_equal(fmedian(1:3, w = c(1,Inf,3), na.rm = FALSE), 2) # expect_equal(fmedian(1:3, w = c(1,-Inf,3), na.rm = FALSE), 3) }) test_that("fmedian produces errors for wrong input", { expect_error(fmedian("a")) expect_error(fmedian(NA_character_)) expect_error(fmedian(mNAc)) expect_error(fmedian(mNAc, f)) expect_error(fmedian(1:2,1:3)) expect_error(fmedian(m,1:31)) expect_error(fmedian(mtcars,1:31)) expect_error(fmedian(mtcars, w = 1:31)) expect_error(fmedian("a", w = 1)) expect_error(fmedian(1:2, w = 1:3)) expect_error(fmedian(NA_character_, w = 1)) expect_error(fmedian(mNAc, w = wdat)) expect_error(fmedian(mNAc, f, wdat)) expect_error(fmedian(mNA, w = 1:33)) expect_error(fmedian(1:2,1:2, 1:3)) expect_error(fmedian(m,1:32,1:20)) expect_error(fmedian(mtcars,1:32,1:10)) expect_error(fmedian(1:2, w = c("a","b"))) expect_error(fmedian(wlddev)) expect_error(fmedian(wlddev, w = wlddev$year)) expect_error(fmedian(wlddev, wlddev$iso3c)) expect_error(fmedian(wlddev, wlddev$iso3c, wlddev$year)) }) # fnth g <- GRP(mtcars, ~ cyl) gf <- as_factor_GRP(g) test_that("fnth gives a proper lower/upper/average weighted median on complete data", { expect_identical(fnth(1:3, w = c(3,1,1)), 1) expect_true(all_identical( fnth(1:3, w = c(3,1,1)), fnth(1:3, w = c(3,1,1), ties = "min"), fnth(1:3, w = c(3,1,1), ties = "max"), fnth(1:3, w = c(3,1,1), na.rm = FALSE), fnth(1:3, w = c(3,1,1), ties = "min", na.rm = FALSE), fnth(1:3, w = c(3,1,1), ties = "max", na.rm = FALSE), fnth(1:3, g = rep(1,3), w = c(3,1,1), use.g.names = FALSE), fnth(1:3, g = rep(1,3), w = c(3,1,1), use.g.names = FALSE, ties = "min"), fnth(1:3, g = rep(1,3), w = c(3,1,1), use.g.names = FALSE, ties = "max"), fnth(1:3, g = rep(1,3), w = c(3,1,1), use.g.names = FALSE, na.rm = FALSE), fnth(1:3, g = rep(1,3), w = c(3,1,1), use.g.names = FALSE, ties = "min", na.rm = FALSE), fnth(1:3, g = rep(1,3), w = c(3,1,1), use.g.names = FALSE, ties = "max", na.rm = FALSE))) expect_identical(fnth(1:3, w = c(1,1,3)), 3) expect_true(all_identical( fnth(1:3, w = c(1,1,3)), fnth(1:3, w = c(1,1,3), ties = "min"), fnth(1:3, w = c(1,1,3), ties = "max"), fnth(1:3, w = c(1,1,3), na.rm = FALSE), fnth(1:3, w = c(1,1,3), ties = "min", na.rm = FALSE), fnth(1:3, w = c(1,1,3), ties = "max", na.rm = FALSE), fnth(1:3, g = rep(1,3), w = c(1,1,3), use.g.names = FALSE), fnth(1:3, g = rep(1,3), w = c(1,1,3), use.g.names = FALSE, ties = "min"), fnth(1:3, g = rep(1,3), w = c(1,1,3), use.g.names = FALSE, ties = "max"), fnth(1:3, g = rep(1,3), w = c(1,1,3), use.g.names = FALSE, na.rm = FALSE), fnth(1:3, g = rep(1,3), w = c(1,1,3), use.g.names = FALSE, ties = "min", na.rm = FALSE), fnth(1:3, g = rep(1,3), w = c(1,1,3), use.g.names = FALSE, ties = "max", na.rm = FALSE))) w = c(0.15, 0.1, 0.2, 0.3, 0.25) y = seq_len(5) # [order(rnorm(5))] expect_identical(fnth(y, w = w), 4) expect_true(all_identical(4, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE), fnth(y, w = w, ties = "min"), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min"), fnth(y, w = w, ties = "max"), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max"), fnth(y, w = w, na.rm = FALSE), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, na.rm = FALSE), fnth(y, w = w, ties = "min", na.rm = FALSE), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min", na.rm = FALSE), fnth(y, w = w, ties = "max", na.rm = FALSE), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max", na.rm = FALSE))) w = c(0.15, 0.2, 0.3, 0.25) y = seq_len(4) # [order(rnorm(4))] expect_identical(fnth(y, w = w), 3) expect_true(all_identical(3, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE), fnth(y, w = w, ties = "min"), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min"), fnth(y, w = w, ties = "max"), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max"), fnth(y, w = w, na.rm = FALSE), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, na.rm = FALSE), fnth(y, w = w, ties = "min", na.rm = FALSE), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min", na.rm = FALSE), fnth(y, w = w, ties = "max", na.rm = FALSE), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max", na.rm = FALSE))) w = rep(0.25, 4) expect_identical(fnth(y, w = w), 2.5) expect_identical(2.5, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE)) expect_identical(fnth(y, w = w, ties = "min"), 2) expect_identical(2, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min")) expect_identical(fnth(y, w = w, ties = "max"), 3) expect_identical(3, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max")) expect_identical(fnth(y, w = w, na.rm = FALSE), 2.5) expect_identical(2.5, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, na.rm = FALSE)) expect_identical(fnth(y, w = w, ties = "min", na.rm = FALSE), 2) expect_identical(2, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min", na.rm = FALSE)) expect_identical(fnth(y, w = w, ties = "max", na.rm = FALSE), 3) expect_identical(3, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max", na.rm = FALSE)) w = rep(0.25, 5) y = seq_len(5) #[order(rnorm(5))] expect_identical(fnth(y, w = w), 3) expect_true(all_identical(3, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE), fnth(y, w = w, ties = "min"), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min"), fnth(y, w = w, ties = "max"), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max"), fnth(y, w = w, na.rm = FALSE), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, na.rm = FALSE), fnth(y, w = w, ties = "min", na.rm = FALSE), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min", na.rm = FALSE), fnth(y, w = w, ties = "max", na.rm = FALSE), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max", na.rm = FALSE))) w = c(0.25, 0.25, 0, 0.25, 0.25) expect_identical(fnth(y, w = w), 3) expect_identical(3, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE)) expect_identical(fnth(y, w = w, ties = "min"), 2) expect_identical(2, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min")) expect_identical(fnth(y, w = w, ties = "max"), 4) expect_identical(4, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max")) expect_identical(fnth(y, w = w, na.rm = FALSE), 3) expect_identical(3, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, na.rm = FALSE)) expect_identical(fnth(y, w = w, ties = "min", na.rm = FALSE), 2) expect_identical(2, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min", na.rm = FALSE)) expect_identical(fnth(y, w = w, ties = "max", na.rm = FALSE), 4) expect_identical(4, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max", na.rm = FALSE)) w = c(0.25, 0.25, 0, 0, 0.25, 0.25) y = seq_len(6) # [order(rnorm(6))] expect_identical(fnth(y, w = w), 3.5) expect_identical(3.5, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE)) expect_identical(fnth(y, w = w, ties = "min"), 2) expect_identical(2, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min")) expect_identical(fnth(y, w = w, ties = "max"), 5) expect_identical(5, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max")) expect_identical(fnth(y, w = w, na.rm = FALSE), 3.5) expect_identical(3.5, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, na.rm = FALSE)) expect_identical(fnth(y, w = w, ties = "min", na.rm = FALSE), 2) expect_identical(2, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min", na.rm = FALSE)) expect_identical(fnth(y, w = w, ties = "max", na.rm = FALSE), 5) expect_identical(5, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max", na.rm = FALSE)) }) test_that("fnth performs like nth (defined above)", { n = 2 expect_error(fnth(NA, n)) expect_error(fnth(NA, n, na.rm = FALSE)) expect_error(fnth(1, n)) expect_equal(fnth(1:3, n), nth(1:3, n, na.rm = TRUE)) expect_equal(fnth(-1:1, n), nth(-1:1, n, na.rm = TRUE)) expect_equal(fnth(1:3, n, na.rm = FALSE), nth(1:3, n)) expect_equal(fnth(-1:1, n, na.rm = FALSE), nth(-1:1, n)) expect_equal(fnth(x, n), nth(x, n, na.rm = TRUE)) expect_equal(fnth(x, n, na.rm = FALSE), nth(x, n)) expect_equal(fnth(xNA, n, na.rm = FALSE), nth(xNA, n)) expect_equal(fnth(xNA, n), nth(xNA, n, na.rm = TRUE)) expect_equal(fnth(mtcars, n), fnth(m, n)) # expect_equal(fnth(m, n), dapply(m, nth, n, na.rm = TRUE)) # failed on oldrel-windows-ix86+x86_64 # expect_equal(fnth(m, n, na.rm = FALSE), dapply(m, nth, n)) # failed on oldrel-windows-ix86+x86_64 expect_equal(fnth(mNA, n, na.rm = FALSE), dapply(mNA, nth, n)) expect_equal(fnth(mNA, n), dapply(mNA, nth, n, na.rm = TRUE)) # expect_equal(fnth(mtcars, n), dapply(mtcars, nth, n, na.rm = TRUE)) # failed on oldrel-windows-ix86+x86_64 # expect_equal(fnth(mtcars, n, na.rm = FALSE), dapply(mtcars, nth, n)) # failed on oldrel-windows-ix86+x86_64 expect_equal(fnth(mtcNA, n, na.rm = FALSE), dapply(mtcNA, nth, n)) expect_equal(fnth(mtcNA, n), dapply(mtcNA, nth, n, na.rm = TRUE)) f2 <- as.factor(rep(1:10, each = 10)[order(rnorm(100))]) # expect_equal(fnth(x, n, f2), BY(x, f2, nth, n, na.rm = TRUE)) # failed on oldrel-windows-ix86+x86_64 # expect_equal(fnth(x, n, f2, na.rm = FALSE), BY(x, f2, nth, n)) # failed on oldrel-windows-ix86+x86_64 g2 <- GRP(rep(1:2, each = 16)[order(rnorm(32))]) # expect_equal(fnth(m, n, g2), BY(m, g2, nth, n, na.rm = TRUE)) # failed on oldrel-windows-ix86+x86_64 # expect_equal(fnth(m, n, g2, na.rm = FALSE), BY(m, g2, nth, n)) # failed on oldrel-windows-ix86+x86_64 # expect_equal(fnth(mtcars, n, g2), BY(mtcars, g2, nth, n, na.rm = TRUE)) # failed on oldrel-windows-ix86+x86_64 # expect_equal(fnth(mtcars, n, g2, na.rm = FALSE), BY(mtcars, g2, nth, n)) # failed on oldrel-windows-ix86+x86_64 for(i in 1:5) { n = runif(1, min = 1, max = 999) / 1000 # Probability needed for nth to work with groups expect_equal(fnth(1:3, n, ties = "min"), nth(1:3, n, na.rm = TRUE)) expect_equal(fnth(-1:1, n, ties = "min"), nth(-1:1, n, na.rm = TRUE)) expect_equal(fnth(1:3, n, na.rm = FALSE, ties = "min"), nth(1:3, n)) expect_equal(fnth(-1:1, n, na.rm = FALSE, ties = "min"), nth(-1:1, n)) expect_equal(fnth(x, n, ties = "min"), nth(x, n, na.rm = TRUE)) expect_equal(fnth(x, n, na.rm = FALSE, ties = "min"), nth(x, n)) expect_equal(fnth(xNA, n, na.rm = FALSE, ties = "min"), nth(xNA, n)) expect_equal(fnth(xNA, n, ties = "min"), nth(xNA, n, na.rm = TRUE)) expect_equal(fnth(mtcars, n, ties = "min"), fnth(m, n, ties = "min")) expect_equal(fnth(m, n, ties = "min"), dapply(m, nth, n, na.rm = TRUE)) expect_equal(fnth(m, n, na.rm = FALSE, ties = "min"), dapply(m, nth, n)) expect_equal(fnth(mNA, n, na.rm = FALSE, ties = "min"), dapply(mNA, nth, n)) expect_equal(fnth(mNA, n, ties = "min"), dapply(mNA, nth, n, na.rm = TRUE)) expect_equal(fnth(mtcars, n, ties = "min"), dapply(mtcars, nth, n, na.rm = TRUE)) expect_equal(fnth(mtcars, n, na.rm = FALSE, ties = "min"), dapply(mtcars, nth, n)) expect_equal(fnth(mtcNA, n, na.rm = FALSE, ties = "min"), dapply(mtcNA, nth, n)) expect_equal(fnth(mtcNA, n, ties = "min"), dapply(mtcNA, nth, n, na.rm = TRUE)) expect_equal(fnth(xNA, n, f2, na.rm = FALSE, ties = "min"), BY(xNA, f2, nth, n)) expect_equal(fnth(xNA, n, f2, ties = "min"), BY(xNA, f2, nth, n, na.rm = TRUE)) expect_equal(fnth(m, n, g, ties = "min"), BY(m, g, nth, n, na.rm = TRUE)) expect_equal(fnth(m, n, g, na.rm = FALSE, ties = "min"), BY(m, g, nth, n)) expect_equal(fnth(mNA, n, g, na.rm = FALSE, ties = "min"), BY(mNA, g, nth, n)) expect_equal(fnth(mNA, n, g, ties = "min"), BY(mNA, g, nth, n, na.rm = TRUE)) expect_equal(fnth(mtcars, n, g, ties = "min"), BY(mtcars, g, nth, n, na.rm = TRUE)) expect_equal(fnth(mtcars, n, g, na.rm = FALSE, ties = "min"), BY(mtcars, g, nth, n)) expect_equal(fnth(mtcNA, n, g, na.rm = FALSE, ties = "min"), BY(mtcNA, g, nth, n)) expect_equal(fnth(mtcNA, n, g, ties = "min"), BY(mtcNA, g, nth, n, na.rm = TRUE)) } }) test_that("fnth matrix and data.frame method work alike", { for(i in 1:3) { n = runif(1, min = 1, max = 999) / 1000 expect_equal(fnth(mtcars, n, ties = "min"), fnth(m, n, ties = "min")) expect_equal(fnth(mtcars, n), fnth(m, n)) expect_equal(fnth(mtcars, n, ties = "max"), fnth(m, n, ties = "max")) expect_equal(fnth(mtcNA, n, ties = "min"), fnth(mNA, n, ties = "min")) expect_equal(fnth(mtcNA, n), fnth(mNA, n)) expect_equal(fnth(mtcNA, n, ties = "max"), fnth(mNA, n, ties = "max")) expect_equal(qM(fnth(mtcars, n, g, ties = "min")), fnth(m, n, g, ties = "min")) expect_equal(qM(fnth(mtcars, n, g)), fnth(m, n, g)) expect_equal(qM(fnth(mtcars, n, g, ties = "max")), fnth(m, n, g, ties = "max")) expect_equal(qM(fnth(mtcNA, n, g, ties = "min")), fnth(mNA, n, g, ties = "min")) expect_equal(qM(fnth(mtcNA, n, g)), fnth(mNA, n, g)) expect_equal(qM(fnth(mtcNA, n, g, ties = "max")), fnth(mNA, n, g, ties = "max")) expect_equal(fnth(mtcars, n, w = wdat, ties = "min"), fnth(m, n, w = wdat, ties = "min")) expect_equal(fnth(mtcars, n, w = wdat), fnth(m, n, w = wdat)) expect_equal(fnth(mtcars, n, w = wdat, ties = "max"), fnth(m, n, w = wdat, ties = "max")) expect_equal(fnth(mtcNA, n, w = wdat, ties = "min"), fnth(mNA, n, w = wdat, ties = "min")) expect_equal(fnth(mtcNA, n, w = wdat), fnth(mNA, n, w = wdat)) expect_equal(fnth(mtcNA, n, w = wdat, ties = "max"), fnth(mNA, n, w = wdat, ties = "max")) expect_equal(qM(fnth(mtcars, n, g, wdat, ties = "min")), fnth(m, n, g, wdat, ties = "min")) expect_equal(qM(fnth(mtcars, n, g, wdat)), fnth(m, n, g, wdat)) expect_equal(qM(fnth(mtcars, n, g, wdat, ties = "max")), fnth(m, n, g, wdat, ties = "max")) expect_equal(qM(fnth(mtcNA, n, g, wdat, ties = "min")), fnth(mNA, n, g, wdat, ties = "min")) expect_equal(qM(fnth(mtcNA, n, g, wdat)), fnth(mNA, n, g, wdat)) expect_equal(qM(fnth(mtcNA, n, g, wdat, ties = "max")), fnth(mNA, n, g, wdat, ties = "max")) } }) test_that("fnth performs like fnth with weights all equal", { for(t in c("min","max")) { # "mean", # already tested above.. # for(i in 1:3) { n = 0.5 # round(runif(1, min = 1, max = 999) / 1000, 3) # other numbers than 0.5 do not work and cannot work.. expect_equal(fnth(NA, n, ties = t), fnth(NA, n, w = 1, ties = t)) expect_equal(fnth(NA, n, na.rm = FALSE, ties = t), fnth(NA, n, w = 1, na.rm = FALSE, ties = t)) expect_equal(fnth(1, n, ties = t), fnth(1, n, w = 3, ties = t)) expect_equal(fnth(1:3, n, ties = t), fnth(1:3, n, w = rep(1,3), ties = t)) expect_equal(fnth(-1:1, n, ties = t), fnth(-1:1, n, w = rep(4.2,3), ties = t)) expect_equal(fnth(1, n, na.rm = FALSE, ties = t), fnth(1, n, w = 5, na.rm = FALSE, ties = t)) expect_equal(fnth(1:3, n, na.rm = FALSE, ties = t), fnth(1:3, n, w = rep(1, 3), na.rm = FALSE, ties = t)) expect_equal(fnth(-1:1, n, na.rm = FALSE, ties = t), fnth(-1:1, n, w = rep(12, 3), na.rm = FALSE, ties = t)) expect_equal(fnth(x, n, ties = t), fnth(x, n, w = rep(1,100), ties = t)) expect_equal(fnth(x, n, na.rm = FALSE, ties = t), fnth(x, n, w = rep(1, 100), na.rm = FALSE, ties = t)) expect_equal(fnth(xNA, n, na.rm = FALSE, ties = t), fnth(xNA, n, w = rep(5, 100), na.rm = FALSE, ties = t)) expect_equal(fnth(xNA, n, ties = t), fnth(xNA, n, w = rep(4, 100), ties = t)) expect_equal(fnth(m, n, ties = t), fnth(m, n, w = rep(6587, 32), ties = t)) expect_equal(fnth(m, n, na.rm = FALSE, ties = t), fnth(m, n, w = rep(6587, 32), na.rm = FALSE, ties = t)) expect_equal(fnth(mNA, n, na.rm = FALSE, ties = t), fnth(mNA, n, w = rep(6587, 32), na.rm = FALSE, ties = t)) expect_equal(fnth(mNA, n, ties = t), fnth(mNA, n, w = rep(6587, 32), ties = t)) expect_equal(fnth(mtcars, n, ties = t), fnth(mtcars, n, w = rep(6787, 32), ties = t)) expect_equal(fnth(mtcars, n, na.rm = FALSE, ties = t), fnth(mtcars, n, w = rep(6787, 32), na.rm = FALSE, ties = t)) expect_equal(fnth(mtcNA, n, na.rm = FALSE, ties = t), fnth(mtcNA, n, w = rep(6787, 32), na.rm = FALSE, ties = t)) expect_equal(fnth(mtcNA, n, ties = t), fnth(mtcNA, n, w = rep(6787, 32), ties = t)) expect_equal(fnth(x, n, f, ties = t), fnth(x, n, f, rep(547,100), ties = t)) expect_equal(fnth(x, n, f, na.rm = FALSE, ties = t), fnth(x, n, f, rep(6, 100), na.rm = FALSE, ties = t)) expect_equal(fnth(xNA, n, f, na.rm = FALSE, ties = t), fnth(xNA, n, f, rep(52,100), na.rm = FALSE, ties = t)) expect_equal(fnth(xNA, n, f, ties = t), fnth(xNA, n, f, rep(5997456,100), ties = t)) expect_equal(fnth(m, n, g, ties = t), fnth(m, n, g, rep(546,32), ties = t)) expect_equal(fnth(m, n, g, na.rm = FALSE, ties = t), fnth(m, n, g, rep(1,32), na.rm = FALSE, ties = t)) expect_equal(fnth(mNA, n, g, na.rm = FALSE, ties = t), fnth(mNA, n, g, rep(5,32), na.rm = FALSE, ties = t)) expect_equal(fnth(mNA, n, g, ties = t), fnth(mNA, n, g, rep(1,32), ties = t)) expect_equal(fnth(mtcars, n, g, ties = t), fnth(mtcars, n, g, rep(53,32), ties = t)) expect_equal(fnth(mtcars, n, g, na.rm = FALSE, ties = t), fnth(mtcars, n, g, rep(546,32), na.rm = FALSE, ties = t)) expect_equal(fnth(mtcNA, n, g, na.rm = FALSE, ties = t), fnth(mtcNA, n, g, rep(1,32), na.rm = FALSE, ties = t)) expect_equal(fnth(mtcNA, n, g, ties = t), fnth(mtcNA, n, g, rep(999,32), ties = t)) #} } }) test_that("fnth with weights performs like wnth (defined above)", { for(t in c("mean","min","max")) { # print(t) for(i in 1:3) { n = round(runif(1, min = 1, max = 999) / 1000, 3) # complete weights expect_equal(fnth(NA, n, w = 1, ties = t), wnth(NA_real_, n, 1, ties = t)) expect_equal(fnth(NA, n, w = 1, na.rm = FALSE, ties = t), wnth(NA_real_, n, 1, ties = t)) expect_equal(fnth(1, n, w = 1, ties = t), wnth(1, n, w = 1, ties = t)) expect_equal(fnth(1:3, n, w = 1:3, ties = t), wnth(1:3, n, 1:3, ties = t)) expect_equal(fnth(-1:1, n, w = 1:3, ties = t), wnth(-1:1, n, 1:3, ties = t)) expect_equal(fnth(1, n, w = 1, na.rm = FALSE, ties = t), wnth(1, n, 1, ties = t)) expect_equal(fnth(1:3, n, w = c(0.99,3454,1.111), na.rm = FALSE, ties = t), wnth(1:3, n, c(0.99,3454,1.111), ties = t)) expect_equal(fnth(-1:1, n, w = 1:3, na.rm = FALSE, ties = t), wnth(-1:1, n, 1:3, ties = t)) expect_equal(fnth(x, n, w = w, ties = t), wnth(x, n, w, ties = t)) expect_equal(fnth(x, n, w = w, na.rm = FALSE, ties = t), wnth(x, n, w, ties = t)) expect_equal(fnth(xNA, n, w = w, na.rm = FALSE, ties = t), wnth(xNA, n, w, ties = t)) expect_equal(fnth(xNA, n, w = w, ties = t), wnth(xNA, n, w, na.rm = TRUE, ties = t)) expect_equal(fnth(mtcars, n, w = wdat, ties = t), fnth(m, n, w = wdat, ties = t)) expect_equal(fnth(m, n, w = wdat, ties = t), dapply(m, wnth, n, wdat, na.rm = TRUE, ties = t)) expect_equal(fnth(m, n, w = wdat, na.rm = FALSE, ties = t), dapply(m, wnth, n, wdat, ties = t)) expect_equal(fnth(mNA, n, w = wdat, na.rm = FALSE, ties = t), dapply(mNA, wnth, n, wdat, ties = t)) expect_equal(fnth(mNA, n, w = wdat, ties = t), dapply(mNA, wnth, n, wdat, na.rm = TRUE, ties = t)) expect_equal(fnth(mtcars, n, w = wdat, ties = t), dapply(mtcars, wnth, n, wdat, na.rm = TRUE, ties = t)) expect_equal(fnth(mtcars, n, w = wdat, na.rm = FALSE, ties = t), dapply(mtcars, wnth, n, wdat, ties = t)) expect_equal(fnth(mtcNA, n, w = wdat, na.rm = FALSE, ties = t), dapply(mtcNA, wnth, n, wdat, ties = t)) expect_equal(fnth(mtcNA, n, w = wdat, ties = t), dapply(mtcNA, wnth, n, wdat, na.rm = TRUE, ties = t)) expect_equal(fnth(x, n, f, w, ties = t), wBY(x, f, wnth, n = n, w = w, ties = t)) expect_equal(fnth(x, n, f, w, na.rm = FALSE, ties = t), wBY(x, f, wnth, n = n, w = w, ties = t)) expect_equal(fnth(xNA, n, f, w, na.rm = FALSE, ties = t), wBY(xNA, f, wnth, n = n, w = w, ties = t)) expect_equal(fnth(xNA, n, f, w, ties = t), wBY(xNA, f, wnth, n = n, w = w, na.rm = TRUE, ties = t)) expect_equal(fnth(m, n, g, wdat, ties = t), wBY(m, gf, wnth, n = n, w = wdat, ties = t)) expect_equal(fnth(m, n, g, wdat, na.rm = FALSE, ties = t), wBY(m, gf, wnth, n = n, w = wdat, ties = t)) expect_equal(fnth(mNA, n, g, wdat, na.rm = FALSE, ties = t), wBY(mNA, gf, wnth, n = n, w = wdat, ties = t)) expect_equal(fnth(mNA, n, g, wdat, ties = t), wBY(mNA, gf, wnth, n = n, w = wdat, na.rm = TRUE, ties = t)) expect_equal(fnth(mtcars, n, g, wdat, ties = t), wBY(mtcars, gf, wnth, n = n, w = wdat, ties = t)) expect_equal(fnth(mtcars, n, g, wdat, na.rm = FALSE, ties = t), wBY(mtcars, gf, wnth, n = n, w = wdat, ties = t)) expect_equal(fnth(mtcNA, n, g, wdat, na.rm = FALSE, ties = t), wBY(mtcNA, gf, wnth, n = n, w = wdat, ties = t)) expect_equal(fnth(mtcNA, n, g, wdat, ties = t), wBY(mtcNA, gf, wnth, w = wdat, n = n, na.rm = TRUE, ties = t)) # missing weights: Only supported if x is also missing... expect_equal(fnth(NA, n, w = NA, ties = t), wnth(NA_real_, n, NA_real_, ties = t)) expect_equal(fnth(NA, n, w = NA, na.rm = FALSE, ties = t), wnth(NA_real_, n, NA_real_, ties = t)) expect_equal(fnth(xNA, n, w = wNA, na.rm = FALSE, ties = t), wnth(xNA, n, wNA, ties = t)) expect_equal(fnth(xNA, n, w = wNA, ties = t), wnth(xNA, n, wNA, na.rm = TRUE, ties = t)) expect_equal(fnth(xNA, n, f, wNA, na.rm = FALSE, ties = t), wBY(xNA, f, wnth, n = n, w = w, ties = t)) expect_equal(fnth(xNA, n, f, wNA, ties = t), wBY(xNA, f, wnth, n = n, w = w, na.rm = TRUE, ties = t)) } } }) test_that("fnth properly deals with missing data", { expect_equal(fnth(NA), NA_real_) expect_equal(fnth(NA, na.rm = FALSE), NA_real_) expect_equal(fnth(rep(NA, 2), w = 1:2), NA_real_) expect_equal(fnth(rep(NA, 2), w = 1:2), NA_real_) expect_equal(fnth(NA, w = 1), NA_real_) expect_equal(fnth(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fnth(1), 1) expect_equal(fnth(1, na.rm = FALSE), 1) expect_error(fnth(1:2, w = rep(NA, 2))) expect_error(fnth(1:2, w = c(1, NA))) expect_error(fnth(1:2, w = c(NA, 1))) }) collapse/tests/testthat/test-quick-conversion.R0000644000176200001440000001225714065423017021466 0ustar liggesuserscontext("quick-conversion") # rm(list = ls()) set.seed(101) x <- rnorm(10) xNA <- x xNA[c(3,10)] <- NA f <- sample.int(3, 10, TRUE) fNA <- f fNA[c(3,10)] <- NA l1 <- replicate(10, rnorm(10), simplify = FALSE) l2 <- as.list(mtcars) m <- as.matrix(mtcars) m2 <- replicate(10, rnorm(10)) # Test this (plain matrix) # X = sweep(d, 2L, colMeans(qM(d), na.rm = TRUE), "replace_fill") setdfdt <- function(x) { attr(x, "row.names") <- .set_row_names(length(x[[1L]])) class(x) <- c("data.table","data.frame") alc(x) } test_that("conversions to factor run smoothly", { expect_identical(ordered(as.factor(x)), qF(x, ordered = TRUE)) expect_identical(ordered(as.factor(f)), qF(f, ordered = TRUE)) expect_identical(as.integer(as.factor(xNA)), as.integer(qF(xNA, ordered = TRUE))) expect_identical(as.integer(as.factor(fNA)), as.integer(qF(fNA, ordered = TRUE))) expect_identical(as.integer(as.factor(x)), as.integer(qG(x, ordered = TRUE))) expect_identical(as.integer(as.factor(f)), as.integer(qF(f, ordered = TRUE))) expect_identical(as.integer(as.factor(xNA)), as.integer(qG(xNA, ordered = TRUE))) expect_identical(as.integer(qF(fNA, ordered = TRUE)), as.integer(qG(fNA, ordered = TRUE))) }) test_that("conversions to matrix run smoothly", { expect_identical(do.call(cbind, l1), qM(l1)) expect_identical(do.call(cbind, l2), qM(l2)) expect_identical(as.matrix(mtcars), qM(mtcars)) expect_identical(`dimnames<-`(as.matrix(x), list(NULL, "x")), qM(x)) expect_identical(qM(m), m) expect_identical(qM(m2), m2) }) test_that("conversions to data.frame / data.table run smoothly", { expect_identical(setNames(as.data.frame(l1), paste0("V",1:10)), qDF(l1)) expect_identical(as.data.frame(l2), qDF(l2)) expect_identical(as.data.frame(m), qDF(m)) expect_identical(as.data.frame(m2), qDF(m2)) expect_identical(as.data.frame(x), qDF(x)) expect_identical(qDF(mtcars), mtcars) expect_identical(setdfdt(setNames(as.data.frame(l1), paste0("V",1:10))), qDT(l1)) expect_identical(setdfdt(as.data.frame(l2)), qDT(l2)) expect_identical(setdfdt(as.data.frame(m)), qDT(m)) expect_identical(setdfdt(as.data.frame(m2)), qDT(m2)) expect_identical(setdfdt(as.data.frame(x)), qDT(x)) expect_identical(qDT(mtcars), setdfdt(mtcars)) }) test_that("double-conversions are ok", { expect_identical(qDF(qDT(mtcars)), setRownames(mtcars)) expect_identical(qM(qDT(m)), setRownames(m, NULL)) expect_identical(qM(qDF(m)), m) }) test_that("mrtl and mctl work well", { expect_equal(mctl(m), lapply(seq_col(m), function(i) unattrib(m[, i]))) expect_equal(mctl(m, TRUE), setNames(lapply(seq_col(m), function(i) unattrib(m[, i])), colnames(m))) expect_equal(mctl(m, TRUE, "data.frame"), mtcars) expect_equal(mctl(m, TRUE, "data.table"), qDT(mtcars)) expect_equal(mctl(m, FALSE, "data.frame"), setRownames(setNames(mtcars, paste0("V", seq_col(mtcars))))) expect_equal(mctl(m, FALSE, "data.table"), qDT(setNames(mtcars, paste0("V", seq_col(mtcars))))) expect_equal(mrtl(m), lapply(seq_row(m), function(i) unattrib(m[i, ]))) expect_equal(mrtl(m, TRUE), setNames(lapply(seq_row(m), function(i) unattrib(m[i, ])), rownames(m))) expect_equal(mrtl(m, TRUE, "data.frame"), as.data.frame(t(m))) expect_equal(mrtl(m, TRUE, "data.table"), qDT(as.data.frame(t(m)))) expect_equal(mrtl(m, FALSE, "data.frame"), setRownames(setNames(as.data.frame(t(m)), paste0("V", seq_row(mtcars))))) expect_equal(mrtl(m, FALSE, "data.table"), qDT(setNames(as.data.frame(t(m)), paste0("V", seq_row(mtcars))))) }) test_that("qM keep.attr and class options work as intended", { expect_identical(qM(m), m) expect_identical(qM(m, keep.attr = TRUE), m) expect_identical(qM(m, keep.attr = TRUE, class = "matrix"), `oldClass<-`(m, "matrix")) expect_identical(qM(m, class = "matrix"), `oldClass<-`(m, "matrix")) expect_identical(qM(mtcars), m) expect_identical(qM(mtcars, keep.attr = TRUE), m) expect_identical(qM(mtcars, keep.attr = TRUE, class = "matrix"), `oldClass<-`(m, "matrix")) expect_identical(qM(mtcars, class = "matrix"), `oldClass<-`(m, "matrix")) gmtcars <- `attr<-`(fgroup_by(mtcars, cyl, vs, am), "was.tibble", NULL) expect_identical(qM(gmtcars), m) expect_identical(qM(gmtcars, keep.attr = TRUE), `attr<-`(m, "groups", attr(gmtcars, "groups"))) expect_identical(qM(gmtcars, keep.attr = TRUE, class = "matrix"), `oldClass<-`(`attr<-`(m, "groups", attr(gmtcars, "groups")), "matrix")) expect_identical(qM(gmtcars, class = "matrix"), `oldClass<-`(m, "matrix")) expect_identical(qM(EuStockMarkets, keep.attr = TRUE), EuStockMarkets) expect_identical(qM(EuStockMarkets), unclass(`attr<-`(EuStockMarkets, "tsp", NULL))) expect_false(identical(qM(EuStockMarkets), EuStockMarkets)) expect_false(identical(qM(EuStockMarkets, keep.attr = TRUE, class = "matrix"), EuStockMarkets)) tsl <- list(a = AirPassengers, b = AirPassengers) expect_identical(qM(tsl, keep.attr = TRUE), do.call(cbind, tsl)) expect_identical(qM(tsl), unclass(`attr<-`(do.call(cbind, tsl), "tsp", NULL))) expect_false(identical(qM(tsl), do.call(cbind, tsl))) expect_false(identical(qM(tsl, keep.attr = TRUE, class = "matrix"), do.call(cbind, tsl))) }) collapse/tests/testthat/test-flag-L-F.R0000644000176200001440000005370214166277021017460 0ustar liggesuserscontext("flag / L / F") # rm(list = ls()) # TODO: test computations on irregular time series and panels set.seed(101) x <- abs(10*rnorm(100)) xNA <- x xNA[sample.int(100, 20)] <- NA f <- as.factor(rep(1:10, each = 10)) t <- as.factor(rep(1:10, 10)) data <- setRownames(wlddev[wlddev$iso3c %in% c("BLZ","IND","USA","SRB","GRL"), ]) g <- GRP(droplevels(data$iso3c)) td <- as.factor(data$year) dataNA <- na_insert(data) m <- qM(data) suppressWarnings(storage.mode(m) <- "numeric") mNAc <- qM(dataNA) mNA <- mNAc suppressWarnings(storage.mode(mNA) <- "numeric") # Creatung unordered data: o = order(rnorm(100)) xuo = x[o] xNAuo = xNA[o] fuo = f[o] tuo = t[o] t2uo = seq_len(100)[o] o = order(o) od = order(rnorm(length(td))) muo = m[od, ] datauo = data[od, ] guo = as_factor_GRP(g)[od] tduo = td[od] t2duo = seq_along(od)[od] od = order(od) baselag <- function(x, n = 1) c(rep(NA_real_, n), x[1:(length(x)-n)]) baselead <- function(x, n = 1) c(rep(NA_real_, n), x[1:(length(x)-n)]) # flag test_that("flag performs like baselag", { expect_equal(flag(1:10), baselag(1:10)) expect_equal(flag(1:10, 2), baselag(1:10, 2)) expect_equal(flag(-1:1), baselag(-1:1)) expect_equal(flag(x), baselag(x)) expect_equal(flag(x, 2), baselag(x, 2)) expect_equal(flag(xNA), baselag(xNA)) expect_equal(flag(xNA, 2), baselag(xNA, 2)) expect_equal(flag(m, stubs = FALSE), dapply(m, baselag)) expect_equal(flag(m, 2, stubs = FALSE), dapply(m, baselag, 2)) expect_equal(flag(mNA, stubs = FALSE), dapply(mNA, baselag)) expect_equal(flag(mNA, 2, stubs = FALSE), dapply(mNA, baselag, 2)) expect_equal(flag(num_vars(data), stubs = FALSE), dapply(num_vars(data), baselag)) expect_equal(flag(num_vars(data), 2, stubs = FALSE), dapply(num_vars(data), baselag, 2)) expect_equal(flag(num_vars(dataNA), stubs = FALSE), dapply(num_vars(dataNA), baselag)) expect_equal(flag(num_vars(dataNA), 2, stubs = FALSE), dapply(num_vars(dataNA), baselag, 2)) expect_equal(flag(x, 1, f), BY(x, f, baselag, use.g.names = FALSE)) expect_equal(flag(x, 2, f), BY(x, f, baselag, 2, use.g.names = FALSE)) expect_equal(flag(xNA, 1, f), BY(xNA, f, baselag, use.g.names = FALSE)) expect_equal(flag(xNA, 2, f), BY(xNA, f, baselag, 2, use.g.names = FALSE)) expect_equal(flag(m, 1, g, stubs = FALSE), BY(m, g, baselag, use.g.names = FALSE)) expect_equal(flag(m, 2, g, stubs = FALSE), BY(m, g, baselag, 2, use.g.names = FALSE)) expect_equal(flag(mNA, 1, g, stubs = FALSE), BY(mNA, g, baselag, use.g.names = FALSE)) expect_equal(flag(mNA, 2, g, stubs = FALSE), BY(mNA, g, baselag, 2, use.g.names = FALSE)) expect_equal(flag(num_vars(data), 1, g, stubs = FALSE), BY(num_vars(data), g, baselag, use.g.names = FALSE)) expect_equal(flag(num_vars(data), 2, g, stubs = FALSE), BY(num_vars(data), g, baselag, 2, use.g.names = FALSE)) expect_equal(flag(num_vars(dataNA), 1, g, stubs = FALSE), BY(num_vars(dataNA), g, baselag, use.g.names = FALSE)) expect_equal(flag(num_vars(dataNA), 2, g, stubs = FALSE), BY(num_vars(dataNA), g, baselag, 2, use.g.names = FALSE)) # Adding time-variable: Computing fully identified panel-lags !! expect_equal(flag(x, 1, f, t), BY(x, f, baselag, use.g.names = FALSE)) expect_equal(flag(x, 2, f, t), BY(x, f, baselag, 2, use.g.names = FALSE)) expect_equal(flag(xNA, 1, f, t), BY(xNA, f, baselag, use.g.names = FALSE)) expect_equal(flag(xNA, 2, f, t), BY(xNA, f, baselag, 2, use.g.names = FALSE)) expect_equal(flag(m, 1, g, td, stubs = FALSE), BY(m, g, baselag, use.g.names = FALSE)) expect_equal(flag(m, 2, g, td, stubs = FALSE), BY(m, g, baselag, 2, use.g.names = FALSE)) expect_equal(flag(mNA, 1, g, td, stubs = FALSE), BY(mNA, g, baselag, use.g.names = FALSE)) expect_equal(flag(mNA, 2, g, td, stubs = FALSE), BY(mNA, g, baselag, 2, use.g.names = FALSE)) expect_equal(flag(num_vars(data), 1, g, td, stubs = FALSE), BY(num_vars(data), g, baselag, use.g.names = FALSE)) expect_equal(flag(num_vars(data), 2, g, td, stubs = FALSE), BY(num_vars(data), g, baselag, 2, use.g.names = FALSE)) expect_equal(flag(num_vars(dataNA), 1, g, td, stubs = FALSE), BY(num_vars(dataNA), g, baselag, use.g.names = FALSE)) expect_equal(flag(num_vars(dataNA), 2, g, td, stubs = FALSE), BY(num_vars(dataNA), g, baselag, 2, use.g.names = FALSE)) }) test_that("flag performs (panel-) vector lags and leads without errors", { expect_visible(flag(1:10, -2:2)) expect_visible(flag(1:10, 1:2)) expect_visible(flag(1:10, -1:-2)) expect_visible(flag(1:10, 0)) expect_visible(flag(xNA, -2:2)) expect_visible(flag(xNA, 1:2)) expect_visible(flag(xNA, -1:-2)) expect_visible(flag(xNA, 0)) expect_visible(flag(xNA, -2:2, f)) expect_visible(flag(xNA, 1:2, f)) expect_visible(flag(xNA, -1:-2, f)) expect_visible(flag(xNA, 0, f)) expect_visible(flag(xNA, -2:2, f, t)) expect_visible(flag(xNA, 1:2, f, t)) expect_visible(flag(xNA, -1:-2, f, t)) expect_visible(flag(xNA, 0, f, t)) }) test_that("flag performs (panel-) matrix lags and leads without errors", { expect_visible(flag(m, -2:2)) expect_visible(flag(m, 1:2)) expect_visible(flag(m, -1:-2)) expect_visible(flag(m, 0)) expect_visible(flag(m, -2:2, g)) expect_visible(flag(m, 1:2, g)) expect_visible(flag(m, -1:-2, g)) expect_visible(flag(m, 0, g)) expect_visible(flag(m, -2:2, g, td)) expect_visible(flag(m, 1:2, g, td)) expect_visible(flag(m, -1:-2, g, td)) expect_visible(flag(m, 0, g, td)) }) test_that("flag performs (panel-) data.frame lags and leads without errors", { expect_visible(flag(data, -2:2)) expect_visible(flag(data, 1:2)) expect_visible(flag(data, -1:-2)) expect_visible(flag(data, 0)) expect_visible(flag(data, -2:2, g)) expect_visible(flag(data, 1:2, g)) expect_visible(flag(data, -1:-2, g)) expect_visible(flag(data, 0, g)) expect_visible(flag(data, -2:2, g, td)) expect_visible(flag(data, 1:2, g, td)) expect_visible(flag(data, -1:-2, g, td)) expect_visible(flag(data, 0, g, td)) }) test_that("flag correctly handles unordered time-series and panel-series computations", { expect_equal(flag(x, -2:2, t = 1:100), flag(x, -2:2)) expect_equal(flag(xNA, -2:2, t = 1:100), flag(xNA, -2:2)) expect_equal(flag(m, -2:2, t = seq_along(td)), flag(m, -2:2)) expect_equal(flag(data, -2:2, t = seq_along(td)), flag(data, -2:2)) expect_equal(flag(xuo, -2:2, t = t2uo)[o,], unclass(flag(x, -2:2))) expect_equal(flag(xNAuo, -2:2, t = t2uo)[o,], unclass(flag(xNA, -2:2))) expect_equal(flag(muo, -2:2, t = t2duo)[od,], unclass(flag(m, -2:2))) expect_equal(flag(datauo, -2:2, t = t2duo)[od,], flag(data, -2:2)) expect_equal(flag(xuo, -2:2, fuo, tuo)[o,], unclass(flag(x, -2:2, f, t))) expect_equal(flag(xNAuo, -2:2, fuo, tuo)[o,], unclass(flag(xNA, -2:2, f, t))) expect_equal(flag(muo, -2:2, guo, tduo)[od,], unclass(flag(m, -2:2, g, td))) expect_equal(flag(datauo, -2:2, guo, tduo)[od,], flag(data, -2:2, g, td)) }) test_that("flag performs numerically stable in ordered computations", { expect_true(all_obj_equal(replicate(50, flag(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(data), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(dataNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(x, 1, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(x, -2:2, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(xNA, 1, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(xNA, -2:2, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(m, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(m, -2:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(mNA, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(mNA, -2:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(data, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(data, -2:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(dataNA, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(dataNA, -2:2, g), simplify = FALSE))) }) test_that("flag performs numerically stable in unordered computations", { expect_true(all_obj_equal(replicate(50, flag(xuo, t = t2uo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(xNAuo, t = t2uo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(muo, t = t2duo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(datauo, t = t2duo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(xuo, 1, fuo, tuo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(xuo, -2:2, fuo, tuo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(muo, 1, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(muo, -2:2, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(datauo, 1, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(datauo, -2:2, guo, tduo), simplify = FALSE))) }) test_that("flag handles special values in the right way", { # zero expect_equal(flag(c("a","b"),0), c("a","b")) expect_equal(flag(c(NaN,NaN),0), c(NaN,NaN)) expect_equal(flag(c(Inf,Inf),0), c(Inf,Inf)) expect_equal(flag(c(FALSE,TRUE),0), c(FALSE,TRUE)) expect_equal(flag(c(TRUE,FALSE),0), c(TRUE,FALSE)) # lags expect_equal(flag(c("a","b")), c(NA,"a")) expect_equal(flag(c(1,NA)), c(NA_real_,1)) expect_equal(flag(c(NA,1)), c(NA_real_,NA_real_)) expect_equal(flag(c(NaN,1)), c(NA_real_,NaN)) expect_equal(flag(c(1,NaN)), c(NA_real_,1)) expect_equal(flag(c(Inf,1)), c(NA,Inf)) expect_equal(flag(c(1,Inf)), c(NA,1)) expect_equal(flag(c(Inf,NA)), c(NA_real_,Inf)) expect_equal(flag(c(NA,Inf)), c(NA_real_,NA_real_)) expect_equal(flag(c(Inf,-Inf)), c(NA,Inf)) expect_equal(flag(c(-Inf,Inf)), c(NA,-Inf)) expect_equal(flag(c(Inf,Inf)), c(NA,Inf)) expect_equal(flag(c(TRUE,TRUE)), c(NA,TRUE)) expect_equal(flag(c(TRUE,FALSE)), c(NA,TRUE)) expect_equal(flag(c(FALSE,TRUE)), c(NA,FALSE)) # leads expect_equal(flag(c("a","b"),-1), c("b",NA)) expect_equal(flag(c(1,NA),-1), c(NA_real_,NA_real_)) expect_equal(flag(c(NA,1),-1), c(1,NA_real_)) expect_equal(flag(c(NaN,1),-1), c(1,NA_real_)) expect_equal(flag(c(1,NaN),-1), c(NaN,NA_real_)) expect_equal(flag(c(Inf,1),-1), c(1,NA_real_)) expect_equal(flag(c(1,Inf),-1), c(Inf,NA_real_)) expect_equal(flag(c(Inf,NA),-1), c(NA_real_,NA_real_)) expect_equal(flag(c(NA,Inf),-1), c(Inf,NA_real_)) expect_equal(flag(c(Inf,-Inf),-1), c(-Inf,NA_real_)) expect_equal(flag(c(-Inf,Inf),-1), c(Inf,NA_real_)) expect_equal(flag(c(Inf,Inf),-1), c(Inf,NA_real_)) expect_equal(flag(c(TRUE,TRUE),-1), c(TRUE,NA)) expect_equal(flag(c(TRUE,FALSE),-1), c(FALSE,NA)) expect_equal(flag(c(FALSE,TRUE),-1), c(TRUE,NA)) }) test_that("flag produces errors for wrong input", { # type: normally guaranteed by C++ expect_visible(flag(mNAc)) expect_visible(flag(wlddev)) expect_error(flag(mNAc, f)) expect_error(flag(x, "1")) # if n exceeds length(x), should give error expect_error(flag(x,101)) expect_error(flag(x,-101)) # if n exceeds average group size, should give error # expect_warning(flag(x,11,f)) # Some fail on i386 ?? # expect_warning(flag(x,11,f,t)) # expect_warning(flag(x,-11,f)) # expect_warning(flag(x,-11,f,t)) # passing repeated n-values or non-positive or non-consecutive diff values should give error expect_error(flag(x,c(1,1))) expect_error(flag(x,c(-1,-1))) expect_visible(flag(x,2:1)) expect_visible(flag(x,0)) expect_error(flag(x,f)) # common source of error probably is passing the factor in a wrong slot expect_error(flag(x,c(1,1),f)) expect_error(flag(x,c(1,1),f,t)) expect_visible(flag(x,2:1,f)) expect_visible(flag(x,2:1,f,t)) expect_visible(flag(x,0,f)) expect_visible(flag(x,0,f,t)) expect_error(flag(x,1,1)) # wrong inputs: passing a non-existent difference argument.. expect_error(flag(x,1,1)) expect_error(flag(x,1,1,f)) expect_error(flag(x,1,1,f,t)) expect_error(flag(x,1,-1,f)) expect_error(flag(x,1,-1,f,t)) # repeated values or gaps in time-variable should give error expect_error(flag(1:3, t = c(1,1,2))) expect_error(flag(1:3, t = c(1,2,2))) expect_error(flag(1:3, t = c(1,2,1))) expect_error(flag(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,2,1:3,1:4))) # This is the only possible statement which does not throw a reteated timevar error because the first C++ index is 0, and omap is also initialized with 0's. expect_error(flag(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,1,1:3,1:4))) expect_error(flag(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1,1,3,4))) expect_error(flag(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,2,1:3,1:4))) expect_visible(flag(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))) expect_error(flag(1:10, g = c(1,2,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1:4))) expect_visible(flag(1:10, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))) # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(flag(1:3, t = 1:2)) expect_error(flag(1:3, t = 1:4)) expect_error(flag(1:3, g = 1:2)) expect_error(flag(1:3, g = 1:4)) expect_error(flag(1:4, g = c(1,1,2,2), t = c(1,2,1))) expect_error(flag(1:4, g = c(1,2,2), t = c(1,2,1,2))) }) # L and F test_that("F performs like baselead", { expect_equal(F(1:10, -1), baselead(1:10)) expect_equal(F(1:10, -2), baselead(1:10, 2)) expect_equal(F(-1:1, -1), baselead(-1:1)) expect_equal(F(x, -1), baselead(x)) expect_equal(F(x, -2), baselead(x, 2)) expect_equal(F(xNA, -1), baselead(xNA)) expect_equal(F(xNA, -2), baselead(xNA, 2)) expect_equal(F(m, -1, stubs = FALSE), dapply(m, baselead)) expect_equal(F(m, -2, stubs = FALSE), dapply(m, baselead, 2)) expect_equal(F(mNA, -1, stubs = FALSE), dapply(mNA, baselead)) expect_equal(F(mNA, -2, stubs = FALSE), dapply(mNA, baselead, 2)) expect_equal(F(num_vars(data), -1, stubs = FALSE), dapply(num_vars(data), baselead)) expect_equal(F(num_vars(data), -2, stubs = FALSE), dapply(num_vars(data), baselead, 2)) expect_equal(F(num_vars(dataNA), -1, stubs = FALSE), dapply(num_vars(dataNA), baselead)) expect_equal(F(num_vars(dataNA), -2, stubs = FALSE), dapply(num_vars(dataNA), baselead, 2)) expect_equal(F(x, -1, f), BY(x, f, baselead, use.g.names = FALSE)) expect_equal(F(x, -2, f), BY(x, f, baselead, 2, use.g.names = FALSE)) expect_equal(F(xNA, -1, f), BY(xNA, f, baselead, use.g.names = FALSE)) expect_equal(F(xNA, -2, f), BY(xNA, f, baselead, 2, use.g.names = FALSE)) expect_equal(F(m, -1, g, stubs = FALSE), BY(m, g, baselead, use.g.names = FALSE)) expect_equal(F(m, -2, g, stubs = FALSE), BY(m, g, baselead, 2, use.g.names = FALSE)) expect_equal(F(mNA, -1, g, stubs = FALSE), BY(mNA, g, baselead, use.g.names = FALSE)) expect_equal(F(mNA, -2, g, stubs = FALSE), BY(mNA, g, baselead, 2, use.g.names = FALSE)) expect_equal(F(num_vars(data), -1, g, stubs = FALSE), BY(num_vars(data), g, baselead, use.g.names = FALSE)) expect_equal(F(num_vars(data), -2, g, stubs = FALSE), BY(num_vars(data), g, baselead, 2, use.g.names = FALSE)) expect_equal(F(num_vars(dataNA), -1, g, stubs = FALSE), BY(num_vars(dataNA), g, baselead, use.g.names = FALSE)) expect_equal(F(num_vars(dataNA), -2, g, stubs = FALSE), BY(num_vars(dataNA), g, baselead, 2, use.g.names = FALSE)) # Adding time-variable: Computing fully identified panel-lags !! expect_equal(F(x, -1, f, t), BY(x, f, baselead, use.g.names = FALSE)) expect_equal(F(x, -2, f, t), BY(x, f, baselead, 2, use.g.names = FALSE)) expect_equal(F(xNA, -1, f, t), BY(xNA, f, baselead, use.g.names = FALSE)) expect_equal(F(xNA, -2, f, t), BY(xNA, f, baselead, 2, use.g.names = FALSE)) expect_equal(F(m, -1, g, td, stubs = FALSE), BY(m, g, baselead, use.g.names = FALSE)) expect_equal(F(m, -2, g, td, stubs = FALSE), BY(m, g, baselead, 2, use.g.names = FALSE)) expect_equal(F(mNA, -1, g, td, stubs = FALSE), BY(mNA, g, baselead, use.g.names = FALSE)) expect_equal(F(mNA, -2, g, td, stubs = FALSE), BY(mNA, g, baselead, 2, use.g.names = FALSE)) expect_equal(F(num_vars(data), -1, g, td, stubs = FALSE), BY(num_vars(data), g, baselead, use.g.names = FALSE)) expect_equal(F(num_vars(data), -2, g, td, stubs = FALSE), BY(num_vars(data), g, baselead, 2, use.g.names = FALSE)) expect_equal(F(num_vars(dataNA), -1, g, td, stubs = FALSE), BY(num_vars(dataNA), g, baselead, use.g.names = FALSE)) expect_equal(F(num_vars(dataNA), -2, g, td, stubs = FALSE), BY(num_vars(dataNA), g, baselead, 2, use.g.names = FALSE)) }) test_that("L and F do the opposite of one another", { expect_equal(L(1:10, -2:2), F(1:10, 2:-2)) expect_equal(L(m, -2:2), F(m, 2:-2)) expect_equal(L(data, -2:2), F(data, 2:-2)) }) test_that("L produces errors for wrong input", { # type: normally guaranteed by C++ expect_visible(L(mNAc)) expect_visible(L(wlddev)) expect_error(L(mNAc, f)) expect_error(L(x, "1")) # if n exceeds length(x), should give error expect_error(L(x,101)) expect_error(L(x,-101)) # if n exceeds average group size, should give error # expect_warning(L(x,11,f)) -> some fail on i336 # expect_warning(L(x,11,f,t)) # expect_warning(L(x,-11,f)) # expect_warning(L(x,-11,f,t)) # passing repeated n-values or non-positive or non-consecutive diff values should give error expect_error(L(x,c(1,1))) expect_error(L(x,c(-1,-1))) expect_visible(L(x,2:1)) expect_visible(L(x,0)) expect_error(L(x,f)) # common source of error probably is passing the factor in a wrong slot expect_error(L(x,c(1,1),f)) expect_error(L(x,c(1,1),f,t)) expect_visible(L(x,2:1,f)) expect_visible(L(x,2:1,f,t)) expect_visible(L(x,0,f)) expect_visible(L(x,0,f,t)) expect_error(L(x,1,1)) # wrong inputs: passing a non-existent difference argument.. expect_error(L(x,1,1)) expect_error(L(x,1,1,f)) expect_error(L(x,1,1,f,t)) expect_error(L(x,1,-1,f)) expect_error(L(x,1,-1,f,t)) # repeated values or gaps in time-variable should give error expect_error(L(1:3, t = c(1,1,2))) expect_error(L(1:3, t = c(1,2,2))) expect_error(L(1:3, t = c(1,2,1))) expect_error(L(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,2,1:3,1:4))) # This is the only possible statement which does not throw a reteated timevar error because the first C++ index is 0, and omap is also initialized with 0's. expect_error(L(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,1,1:3,1:4))) expect_error(L(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1,1,3,4))) expect_error(L(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,2,1:3,1:4))) expect_visible(L(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))) expect_error(L(1:10, g = c(1,2,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1:4))) expect_visible(L(1:10, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))) # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(L(1:3, t = 1:2)) expect_error(L(1:3, t = 1:4)) expect_error(L(1:3, g = 1:2)) expect_error(L(1:3, g = 1:4)) expect_error(L(1:4, g = c(1,1,2,2), t = c(1,2,1))) expect_error(L(1:4, g = c(1,2,2), t = c(1,2,1,2))) }) test_that("L.data.frame method is foolproof", { expect_visible(L(wlddev)) expect_visible(L(wlddev, by = wlddev$iso3c)) expect_error(L(wlddev, t = ~year)) expect_visible(L(wlddev, 1, wlddev$iso3c)) expect_visible(L(wlddev, -2:2, ~iso3c)) expect_visible(L(wlddev, 1, ~iso3c + region)) expect_visible(L(wlddev, -2:2, wlddev$iso3c, wlddev$year)) expect_visible(L(wlddev, -2:2, ~iso3c, ~year)) expect_visible(L(wlddev, cols = 9:12)) expect_visible(L(wlddev, -1:1,~iso3c, cols = 9:12)) expect_visible(L(wlddev, -1:1,wlddev$iso3c, wlddev$year, cols = 9:12)) expect_visible(L(wlddev, -1:1,~iso3c, ~year, cols = 9:12)) expect_visible(L(wlddev, cols = c("PCGDP","LIFEEX"))) expect_visible(L(wlddev, -1:1,wlddev$iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(L(wlddev, -1:1,~iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(L(wlddev, -1:1,wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(L(wlddev, -1:1,~iso3c, ~year, cols = c("PCGDP","LIFEEX"))) expect_visible(L(wlddev, cols = NULL)) expect_visible(L(wlddev, -1:1,wlddev$iso3c, cols = NULL)) expect_visible(L(wlddev, -1:1,~iso3c, cols = NULL)) expect_visible(L(wlddev, -1:1,wlddev$iso3c, wlddev$year, cols = NULL)) expect_visible(L(wlddev, -1:1,~iso3c, ~year, cols = NULL)) expect_error(L(wlddev, cols = 9:14)) expect_error(L(wlddev, -1:1,~iso3c, ~year, cols = 9:14)) expect_error(L(wlddev, cols = c("PCGDP","LIFEEX","bla"))) expect_error(L(wlddev, -1:1,~iso3c, ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_warning(L(wlddev, w = 4)) expect_warning(L(wlddev, g = 4)) expect_error(L(wlddev, t = "year")) expect_error(L(wlddev, by = ~year2)) expect_error(L(wlddev, t = ~year + region)) expect_error(L(wlddev, data)) expect_error(L(wlddev, -1:1,"iso3c")) expect_error(L(wlddev, -1:1,~iso3c2)) expect_error(L(wlddev, -1:1,~iso3c + bla)) expect_error(L(wlddev, -1:1,t = rnorm(30))) expect_error(L(wlddev, -1:1,by = rnorm(30))) expect_error(L(wlddev, -1:1,mtcars$mpg, 1:29)) expect_error(L(wlddev, -1:1,mtcars$mpg, mtcars$cyl)) # this gives a repeated values error first because length(g) == length(t) expect_error(L(wlddev,-1:1, ~iso3c2, ~year2)) expect_error(L(wlddev, cols = ~bla)) expect_visible(L(wlddev, -1:1,wlddev$iso3c, ~year, cols = 9:12)) expect_visible(L(wlddev, -1:1,~iso3c, wlddev$year, cols = 9:12)) expect_error(L(wlddev, -1:1,wlddev$iso3c, ~year + bla, cols = 9:12)) expect_error(L(wlddev, -1:1,~iso3c3, ~year, cols = 9:12)) expect_error(L(wlddev, cols = c("PC3GDP","LIFEEX"))) }) collapse/tests/testthat/test-fmean.R0000644000176200001440000005650414167353106017264 0ustar liggesuserscontext("fmean") bmean <- base::mean bsum <- base::sum # rm(list = ls()) set.seed(101) x <- rnorm(100) w <- abs(100*rnorm(100)) wdat <- abs(100*rnorm(32)) xNA <- x wNA <- w wdatNA <- wdat xNA[sample.int(100,20)] <- NA wNA[sample.int(100,20)] <- NA wdatNA[sample.int(32, 5)] <- NA f <- as.factor(sample.int(10, 100, TRUE)) g <- GRP(mtcars, ~ cyl + vs + am) gf <- as_factor_GRP(g) mtcNA <- na_insert(mtcars) mtcNA[27,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" wmean <- function(x, w, na.rm = FALSE) { if(na.rm) { cc <- complete.cases(x, w) x <- x[cc] w <- w[cc] } bsum(x*w)/bsum(w) } wBY <- function(x, f, FUN, w, ...) { if(is.atomic(x) && !is.array(x)) return(mapply(FUN, split(x, f), split(w, f), ...)) wspl <- split(w, f) if(is.atomic(x)) return(dapply(x, function(xi) mapply(FUN, split(xi, f), wspl, ...))) qDF(dapply(x, function(xi) mapply(FUN, split(xi, f), wspl, ...), return = "matrix")) } test_that("fmean performs like base::mean", { expect_equal(fmean(NA), bmean(NA)) expect_equal(fmean(NA, na.rm = FALSE), bmean(NA)) expect_equal(fmean(1), bmean(1, na.rm = TRUE)) expect_equal(fmean(1:3), bmean(1:3, na.rm = TRUE)) expect_equal(fmean(-1:1), bmean(-1:1, na.rm = TRUE)) expect_equal(fmean(1, na.rm = FALSE), bmean(1)) expect_equal(fmean(1:3, na.rm = FALSE), bmean(1:3)) expect_equal(fmean(-1:1, na.rm = FALSE), bmean(-1:1)) expect_equal(fmean(x), bmean(x, na.rm = TRUE)) expect_equal(fmean(x, na.rm = FALSE), bmean(x)) expect_equal(fmean(xNA, na.rm = FALSE), bmean(xNA)) expect_equal(fmean(xNA), bmean(xNA, na.rm = TRUE)) expect_equal(fmean(mtcars), fmean(m)) expect_equal(fmean(m), dapply(m, bmean, na.rm = TRUE)) expect_equal(fmean(m, na.rm = FALSE), dapply(m, bmean)) expect_equal(fmean(mNA, na.rm = FALSE), dapply(mNA, bmean)) expect_equal(fmean(mNA), dapply(mNA, bmean, na.rm = TRUE)) expect_equal(fmean(mtcars), dapply(mtcars, bmean, na.rm = TRUE)) expect_equal(fmean(mtcars, na.rm = FALSE), dapply(mtcars, bmean)) expect_equal(fmean(mtcNA, na.rm = FALSE), dapply(mtcNA, bmean)) expect_equal(fmean(mtcNA), dapply(mtcNA, bmean, na.rm = TRUE)) expect_equal(fmean(x, f), BY(x, f, bmean, na.rm = TRUE)) expect_equal(fmean(x, f, na.rm = FALSE), BY(x, f, bmean)) expect_equal(fmean(xNA, f, na.rm = FALSE), BY(xNA, f, bmean)) expect_equal(fmean(xNA, f), BY(xNA, f, bmean, na.rm = TRUE)) expect_equal(fmean(m, g), BY(m, g, bmean, na.rm = TRUE)) expect_equal(fmean(m, g, na.rm = FALSE), BY(m, g, bmean)) expect_equal(fmean(mNA, g, na.rm = FALSE), BY(mNA, g, bmean)) expect_equal(fmean(mNA, g), BY(mNA, g, bmean, na.rm = TRUE)) expect_equal(fmean(mtcars, g), BY(mtcars, g, bmean, na.rm = TRUE)) expect_equal(fmean(mtcars, g, na.rm = FALSE), BY(mtcars, g, bmean)) expect_equal(fmean(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bmean)) expect_equal(fmean(mtcNA, g), BY(mtcNA, g, bmean, na.rm = TRUE)) }) test_that("fmean with weights performs as intended (unbiased)", { expect_equal(fmean(c(2,2,4,5,5,5)), fmean(c(2,4,5), w = c(2,1,3))) expect_equal(fmean(c(2,2,4,5,5,5), na.rm = FALSE), fmean(c(2,4,5), w = c(2,1,3), na.rm = FALSE)) expect_equal(fmean(c(2.456,2.456,4.123,5.009,5.009,5.009)), fmean(c(2.456,4.123,5.009), w = c(2,1,3))) expect_equal(fmean(c(2.456,2.456,4.123,5.009,5.009,5.009), na.rm = FALSE), fmean(c(2.456,4.123,5.009), w = c(2,1,3), na.rm = FALSE)) expect_equal(fmean(c(2,2,NA,5,5,5)), fmean(c(2,NA,5), w = c(2,1,3))) expect_equal(fmean(c(2,2,NA,5,5,5), na.rm = FALSE), fmean(c(2,NA,5), w = c(2,1,3), na.rm = FALSE)) expect_equal(fmean(c(2,2,NA,5,5,5)), fmean(c(2,4,5), w = c(2,NA,3))) expect_equal(fmean(c(2,2,NA,5,5,5), na.rm = FALSE), fmean(c(2,4,5), w = c(2,NA,3), na.rm = FALSE)) expect_equal(fmean(c(NA,NA,4.123,5.009,5.009,5.009)), fmean(c(NA,4.123,5.009), w = c(2,1,3))) expect_equal(fmean(c(NA,NA,4.123,5.009,5.009,5.009), na.rm = FALSE), fmean(c(NA,4.123,5.009), w = c(2,1,3), na.rm = FALSE)) expect_equal(fmean(c(NA,NA,4.123,5.009,5.009,5.009)), fmean(c(2.456,4.123,5.009), w = c(NA,1,3))) expect_equal(fmean(c(NA,NA,4.123,5.009,5.009,5.009), na.rm = FALSE), fmean(c(2.456,4.123,5.009), w = c(NA,1,3), na.rm = FALSE)) f <- as.factor(rep(1:2, each = 6)); fs <- as.factor(rep(1:2, each = 3)) v <- c(2,2,4,5,5,5,2,2,4,5,5,5); vs <- c(2,4,5,2,4,5); w <- c(2,1,3,2,1,3) v2 <- c(2.456,2.456,4.123,5.009,5.009,5.009,2.456,2.456,4.123,5.009,5.009,5.009); v2s <- c(2.456,4.123,5.009,2.456,4.123,5.009) expect_equal(fmean(v, f), fmean(vs, fs, w)) expect_equal(fmean(v, f, na.rm = FALSE), fmean(vs, fs, w, na.rm = FALSE)) expect_equal(fmean(v2, f), fmean(v2s, fs, w)) expect_equal(fmean(v2, f, na.rm = FALSE), fmean(v2s, fs, w, na.rm = FALSE)) v[c(3,9)] <- NA; vs[c(2,5)] <- NA expect_equal(fmean(v, f), fmean(vs, fs, w)) expect_equal(fmean(v, f, na.rm = FALSE), fmean(vs, fs, w, na.rm = FALSE)) vs[c(2,5)] <- 4; w[c(2,5)] <- NA expect_equal(fmean(v, f), fmean(vs, fs, w)) expect_equal(fmean(v, f, na.rm = FALSE), fmean(vs, fs, w, na.rm = FALSE)) w[c(2,5)] <- 1; v2[c(1:2,7:8)] <- NA; v2s[c(1,4)] <- NA expect_equal(fmean(v2, f), fmean(v2s, fs, w)) expect_equal(fmean(v2, f, na.rm = FALSE), fmean(v2s, fs, w, na.rm = FALSE)) v2s[c(1,4)] <- 2.456; w[c(1,4)] <- NA expect_equal(fmean(v2, f), fmean(v2s, fs, w)) expect_equal(fmean(v2, f, na.rm = FALSE), fmean(v2s, fs, w, na.rm = FALSE)) }) test_that("fmean performs like fmean with weights all equal", { expect_equal(fmean(NA), fmean(NA, w = 0.99999999)) expect_equal(fmean(NA, na.rm = FALSE), fmean(NA, w = 2.946, na.rm = FALSE)) expect_equal(fmean(1), fmean(1, w = 3)) expect_equal(fmean(1:3), fmean(1:3, w = rep(0.999,3))) expect_equal(fmean(-1:1), fmean(-1:1, w = rep(4.2,3))) expect_equal(fmean(1, na.rm = FALSE), fmean(1, w = 5, na.rm = FALSE)) expect_equal(fmean(1:3, na.rm = FALSE), fmean(1:3, w = rep(1.44565, 3), na.rm = FALSE)) expect_equal(fmean(-1:1, na.rm = FALSE), fmean(-1:1, w = rep(1.44565, 3), na.rm = FALSE)) expect_equal(fmean(x), fmean(x, w = rep(1,100))) expect_equal(fmean(x, na.rm = FALSE), fmean(x, w = rep(1.44565, 100), na.rm = FALSE)) expect_equal(fmean(xNA, na.rm = FALSE), fmean(xNA, w = rep(4.676587, 100), na.rm = FALSE)) expect_equal(fmean(xNA), fmean(xNA, w = rep(4.676587, 100))) expect_equal(fmean(m), fmean(m, w = rep(6587.3454, 32))) expect_equal(fmean(m, na.rm = FALSE), fmean(m, w = rep(6587.3454, 32), na.rm = FALSE)) expect_equal(fmean(mNA, na.rm = FALSE), fmean(mNA, w = rep(6587.3454, 32), na.rm = FALSE)) expect_equal(fmean(mNA), fmean(mNA, w = rep(6587.3454, 32))) expect_equal(fmean(mtcars), fmean(mtcars, w = rep(6787.3454, 32))) expect_equal(fmean(mtcars, na.rm = FALSE), fmean(mtcars, w = rep(6787.3454, 32), na.rm = FALSE)) expect_equal(fmean(mtcNA, na.rm = FALSE), fmean(mtcNA, w = rep(6787.3454, 32), na.rm = FALSE)) expect_equal(fmean(mtcNA), fmean(mtcNA, w = rep(6787.3454, 32))) expect_equal(fmean(x, f), fmean(x, f, rep(546.78,100))) expect_equal(fmean(x, f, na.rm = FALSE), fmean(x, f, rep(5.88,100), na.rm = FALSE)) expect_equal(fmean(xNA, f, na.rm = FALSE), fmean(xNA, f, rep(52.7,100), na.rm = FALSE)) expect_equal(fmean(xNA, f), fmean(xNA, f, rep(5997456,100))) expect_equal(fmean(m, g), fmean(m, g, rep(546.78,32))) expect_equal(fmean(m, g, na.rm = FALSE), fmean(m, g, rep(0.0001,32), na.rm = FALSE)) expect_equal(fmean(mNA, g, na.rm = FALSE), fmean(mNA, g, rep(5.7,32), na.rm = FALSE)) expect_equal(fmean(mNA, g), fmean(mNA, g, rep(1.1,32))) expect_equal(fmean(mtcars, g), fmean(mtcars, g, rep(53,32))) expect_equal(fmean(mtcars, g, na.rm = FALSE), fmean(mtcars, g, rep(546.78,32), na.rm = FALSE)) expect_equal(fmean(mtcNA, g, na.rm = FALSE), fmean(mtcNA, g, rep(0.999999,32), na.rm = FALSE)) expect_equal(fmean(mtcNA, g), fmean(mtcNA, g, rep(999.9999,32))) }) test_that("fmean with weights performs like wmean (defined above)", { # complete weights expect_equal(fmean(NA, w = 1), wmean(NA, 1)) expect_equal(fmean(NA, w = 1, na.rm = FALSE), wmean(NA, 1)) expect_equal(fmean(1, w = 1), wmean(1, w = 1)) expect_equal(fmean(1:3, w = 1:3), wmean(1:3, 1:3)) expect_equal(fmean(-1:1, w = 1:3), wmean(-1:1, 1:3)) expect_equal(fmean(1, w = 1, na.rm = FALSE), wmean(1, 1)) expect_equal(fmean(1:3, w = c(0.99,3454,1.111), na.rm = FALSE), wmean(1:3, c(0.99,3454,1.111))) expect_equal(fmean(-1:1, w = 1:3, na.rm = FALSE), wmean(-1:1, 1:3)) expect_equal(fmean(x, w = w), wmean(x, w)) expect_equal(fmean(x, w = w, na.rm = FALSE), wmean(x, w)) expect_equal(fmean(xNA, w = w, na.rm = FALSE), wmean(xNA, w)) expect_equal(fmean(xNA, w = w), wmean(xNA, w, na.rm = TRUE)) expect_equal(fmean(mtcars, w = wdat), fmean(m, w = wdat)) expect_equal(fmean(m, w = wdat), dapply(m, wmean, wdat, na.rm = TRUE)) expect_equal(fmean(m, w = wdat, na.rm = FALSE), dapply(m, wmean, wdat)) expect_equal(fmean(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wmean, wdat)) expect_equal(fmean(mNA, w = wdat), dapply(mNA, wmean, wdat, na.rm = TRUE)) expect_equal(fmean(mtcars, w = wdat), dapply(mtcars, wmean, wdat, na.rm = TRUE)) expect_equal(fmean(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wmean, wdat)) expect_equal(fmean(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wmean, wdat)) expect_equal(fmean(mtcNA, w = wdat), dapply(mtcNA, wmean, wdat, na.rm = TRUE)) expect_equal(fmean(x, f, w), wBY(x, f, wmean, w)) expect_equal(fmean(x, f, w, na.rm = FALSE), wBY(x, f, wmean, w)) expect_equal(fmean(xNA, f, w, na.rm = FALSE), wBY(xNA, f, wmean, w)) expect_equal(fmean(xNA, f, w), wBY(xNA, f, wmean, w, na.rm = TRUE)) expect_equal(fmean(m, g, wdat), wBY(m, gf, wmean, wdat)) expect_equal(fmean(m, g, wdat, na.rm = FALSE), wBY(m, gf, wmean, wdat)) expect_equal(fmean(mNA, g, wdat, na.rm = FALSE), wBY(mNA, gf, wmean, wdat)) expect_equal(fmean(mNA, g, wdat), wBY(mNA, gf, wmean, wdat, na.rm = TRUE)) expect_equal(fmean(mtcars, g, wdat), wBY(mtcars, gf, wmean, wdat)) expect_equal(fmean(mtcars, g, wdat, na.rm = FALSE), wBY(mtcars, gf, wmean, wdat)) expect_equal(fmean(mtcNA, g, wdat, na.rm = FALSE), wBY(mtcNA, gf, wmean, wdat)) expect_equal(fmean(mtcNA, g, wdat), wBY(mtcNA, gf, wmean, wdat, na.rm = TRUE)) # missing weights expect_equal(fmean(NA, w = NA), wmean(NA, NA)) expect_equal(fmean(NA, w = NA, na.rm = FALSE), wmean(NA, NA)) expect_equal(fmean(1, w = NA), wmean(1, w = NA)) expect_equal(fmean(1:3, w = c(NA,1:2)), wmean(1:3, c(NA,1:2), na.rm = TRUE)) expect_equal(fmean(-1:1, w = c(NA,1:2)), wmean(-1:1, c(NA,1:2), na.rm = TRUE)) expect_equal(fmean(1, w = NA, na.rm = FALSE), wmean(1, NA)) expect_equal(fmean(1:3, w = c(NA,1:2), na.rm = FALSE), wmean(1:3, c(NA,1:2))) expect_equal(fmean(-1:1, w = c(NA,1:2), na.rm = FALSE), wmean(-1:1, c(NA,1:2))) expect_equal(fmean(x, w = wNA), wmean(x, wNA, na.rm = TRUE)) expect_equal(fmean(x, w = wNA, na.rm = FALSE), wmean(x, wNA)) expect_equal(fmean(xNA, w = wNA, na.rm = FALSE), wmean(xNA, wNA)) expect_equal(fmean(xNA, w = wNA), wmean(xNA, wNA, na.rm = TRUE)) expect_equal(fmean(mtcars, w = wdatNA), fmean(m, w = wdatNA)) expect_equal(fmean(m, w = wdatNA), dapply(m, wmean, wdatNA, na.rm = TRUE)) expect_equal(fmean(m, w = wdatNA, na.rm = FALSE), dapply(m, wmean, wdatNA)) expect_equal(fmean(mNA, w = wdatNA, na.rm = FALSE), dapply(mNA, wmean, wdatNA)) expect_equal(fmean(mNA, w = wdatNA), dapply(mNA, wmean, wdatNA, na.rm = TRUE)) expect_equal(fmean(mtcars, w = wdatNA), dapply(mtcars, wmean, wdatNA, na.rm = TRUE)) expect_equal(fmean(mtcars, w = wdatNA, na.rm = FALSE), dapply(mtcars, wmean, wdatNA)) expect_equal(fmean(mtcNA, w = wdatNA, na.rm = FALSE), dapply(mtcNA, wmean, wdatNA)) expect_equal(fmean(mtcNA, w = wdatNA), dapply(mtcNA, wmean, wdatNA, na.rm = TRUE)) expect_equal(fmean(x, f, wNA), wBY(x, f, wmean, wNA, na.rm = TRUE)) expect_equal(fmean(x, f, wNA, na.rm = FALSE), wBY(x, f, wmean, wNA)) expect_equal(fmean(xNA, f, wNA, na.rm = FALSE), wBY(xNA, f, wmean, wNA)) expect_equal(fmean(xNA, f, wNA), wBY(xNA, f, wmean, wNA, na.rm = TRUE)) expect_equal(fmean(m, g, wdatNA), wBY(m, gf, wmean, wdatNA, na.rm = TRUE)) expect_equal(fmean(m, g, wdatNA, na.rm = FALSE), wBY(m, gf, wmean, wdatNA)) expect_equal(fmean(mNA, g, wdatNA, na.rm = FALSE), wBY(mNA, gf, wmean, wdatNA)) expect_equal(fmean(mNA, g, wdatNA), wBY(mNA, gf, wmean, wdatNA, na.rm = TRUE)) expect_equal(fmean(mtcars, g, wdatNA), wBY(mtcars, gf, wmean, wdatNA, na.rm = TRUE)) expect_equal(fmean(mtcars, g, wdatNA, na.rm = FALSE), wBY(mtcars, gf, wmean, wdatNA)) expect_equal(fmean(mtcNA, g, wdatNA, na.rm = FALSE), wBY(mtcNA, gf, wmean, wdatNA)) expect_equal(fmean(mtcNA, g, wdatNA), wBY(mtcNA, gf, wmean, wdatNA, na.rm = TRUE)) }) test_that("fmean performs numerically stable", { expect_true(all_obj_equal(replicate(50, fmean(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, g), simplify = FALSE))) }) test_that("fmean with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fmean(1, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(NA, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(NA, w = 1, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fmean with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fmean(1, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(NA, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(NA, w = NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, g, wdatNA), simplify = FALSE))) }) test_that("fmean handles special values in the right way", { expect_equal(fmean(NA), NA_real_) expect_equal(fmean(NaN), NaN) expect_equal(fmean(Inf), Inf) expect_equal(fmean(-Inf), -Inf) expect_equal(fmean(TRUE), 1) expect_equal(fmean(FALSE), 0) expect_equal(fmean(NA, na.rm = FALSE), NA_real_) expect_equal(fmean(NaN, na.rm = FALSE), NaN) expect_equal(fmean(Inf, na.rm = FALSE), Inf) expect_equal(fmean(-Inf, na.rm = FALSE), -Inf) expect_equal(fmean(TRUE, na.rm = FALSE), 1) expect_equal(fmean(FALSE, na.rm = FALSE), 0) expect_equal(fmean(c(1,NA)), 1) expect_equal(fmean(c(1,NaN)), 1) expect_equal(fmean(c(1,Inf)), Inf) expect_equal(fmean(c(1,-Inf)), -Inf) expect_equal(fmean(c(FALSE,TRUE)), 0.5) expect_equal(fmean(c(FALSE,FALSE)), 0) expect_equal(fmean(c(1,Inf), na.rm = FALSE), Inf) expect_equal(fmean(c(1,-Inf), na.rm = FALSE), -Inf) expect_equal(fmean(c(FALSE,TRUE), na.rm = FALSE), 0.5) expect_equal(fmean(c(FALSE,FALSE), na.rm = FALSE), 0) }) test_that("fmean with weights handles special values in the right way", { expect_equal(fmean(NA, w = 1), NA_real_) expect_equal(fmean(NaN, w = 1), NaN) expect_equal(fmean(Inf, w = 1), Inf) expect_equal(fmean(-Inf, w = 1), -Inf) expect_equal(fmean(TRUE, w = 1), 1) expect_equal(fmean(FALSE, w = 1), 0) expect_equal(fmean(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fmean(NaN, w = 1, na.rm = FALSE), NaN) expect_equal(fmean(Inf, w = 1, na.rm = FALSE), Inf) expect_equal(fmean(-Inf, w = 1, na.rm = FALSE), -Inf) expect_equal(fmean(TRUE, w = 1, na.rm = FALSE), 1) expect_equal(fmean(FALSE, w = 1, na.rm = FALSE), 0) expect_equal(fmean(NA, w = NA), NA_real_) expect_equal(fmean(NaN, w = NA), NA_real_) expect_equal(fmean(Inf, w = NA), NA_real_) expect_equal(fmean(-Inf, w = NA), NA_real_) expect_equal(fmean(TRUE, w = NA), NA_real_) expect_equal(fmean(FALSE, w = NA), NA_real_) expect_equal(fmean(NA, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmean(NaN, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmean(Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmean(-Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmean(TRUE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmean(FALSE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmean(1:3, w = c(1,Inf,3)), NaN) expect_equal(fmean(1:3, w = c(1,-Inf,3)), NaN) expect_equal(fmean(1:3, w = c(1,Inf,3), na.rm = FALSE), NaN) expect_equal(fmean(1:3, w = c(1,-Inf,3), na.rm = FALSE), NaN) }) test_that("fmean produces errors for wrong input", { expect_error(fmean("a")) expect_error(fmean(NA_character_)) expect_error(fmean(mNAc)) expect_error(fmean(mNAc, f)) expect_error(fmean(1:2,1:3)) expect_error(fmean(m,1:31)) expect_error(fmean(mtcars,1:31)) expect_error(fmean(mtcars, w = 1:31)) expect_error(fmean("a", w = 1)) expect_error(fmean(1:2, w = 1:3)) expect_error(fmean(NA_character_, w = 1)) expect_error(fmean(mNAc, w = wdat)) expect_error(fmean(mNAc, f, wdat)) expect_error(fmean(mNA, w = 1:33)) expect_error(fmean(1:2,1:2, 1:3)) expect_error(fmean(m,1:32,1:20)) expect_error(fmean(mtcars,1:32,1:10)) expect_error(fmean(1:2, w = c("a","b"))) expect_error(fmean(wlddev)) expect_error(fmean(wlddev, w = wlddev$year)) expect_error(fmean(wlddev, wlddev$iso3c)) expect_error(fmean(wlddev, wlddev$iso3c, wlddev$year)) }) collapse/tests/testthat/test-miscellaneous-issues.R0000644000176200001440000003173314201327077022344 0ustar liggesuserscontext("miscellaneous issues") # rm(list = ls()) options(warn = -1) if(identical(Sys.getenv("NCRAN"), "TRUE")) { test_that("Using a factor with unused levels does not pose a problem to flag, fdiff or fgrowth (#25)", { wlddev2 <- subset(wlddev, iso3c %in% c("ALB", "AFG", "DZA")) wlddev3 <- droplevels(wlddev2) expect_identical(L(wlddev3, 1, LIFEEX~iso3c, ~year), L(wlddev3, 1, ~iso3c, ~year, cols="LIFEEX")) expect_identical(L(wlddev3, -1:1, LIFEEX~iso3c, ~year), L(wlddev3, -1:1, ~iso3c, ~year, cols="LIFEEX")) expect_identical(droplevels(L(wlddev2, 1, ~iso3c, ~year, cols="LIFEEX")), L(wlddev3, 1, ~iso3c, ~year, cols="LIFEEX")) expect_identical(droplevels(L(wlddev2, -1:1, ~iso3c, ~year, cols="LIFEEX")), L(wlddev3, -1:1, ~iso3c, ~year, cols="LIFEEX")) expect_identical(droplevels(D(wlddev2, 1, 1, ~iso3c, ~year, cols="LIFEEX")), D(wlddev3, 1, 1, ~iso3c, ~year, cols="LIFEEX")) expect_identical(droplevels(D(wlddev2, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX")), D(wlddev3, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX")) expect_identical(droplevels(Dlog(wlddev2, 1, 1, ~iso3c, ~year, cols="LIFEEX")), Dlog(wlddev3, 1, 1, ~iso3c, ~year, cols="LIFEEX")) expect_identical(droplevels(Dlog(wlddev2, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX")), Dlog(wlddev3, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX")) expect_identical(droplevels(D(wlddev2, 1, 1, ~iso3c, ~year, cols="LIFEEX", rho = 0.95)), D(wlddev3, 1, 1, ~iso3c, ~year, cols="LIFEEX", rho = 0.95)) expect_identical(droplevels(D(wlddev2, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX", rho = 0.95)), D(wlddev3, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX", rho = 0.95)) expect_identical(droplevels(Dlog(wlddev2, 1, 1, ~iso3c, ~year, cols="LIFEEX", rho = 0.95)), Dlog(wlddev3, 1, 1, ~iso3c, ~year, cols="LIFEEX", rho = 0.95)) expect_identical(droplevels(Dlog(wlddev2, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX", rho = 0.95)), Dlog(wlddev3, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX", rho = 0.95)) expect_identical(droplevels(G(wlddev2, 1, 1, ~iso3c, ~year, cols="LIFEEX")), G(wlddev3, 1, 1, ~iso3c, ~year, cols="LIFEEX")) expect_identical(droplevels(G(wlddev2, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX")), G(wlddev3, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX")) expect_identical(L(wlddev3, 1, LIFEEX~iso3c), L(wlddev3, 1, ~iso3c, cols="LIFEEX")) expect_identical(L(wlddev3, -1:1, LIFEEX~iso3c), L(wlddev3, -1:1, ~iso3c, cols="LIFEEX")) expect_identical(droplevels(L(wlddev2, 1, ~iso3c, cols="LIFEEX")), L(wlddev3, 1, ~iso3c, cols="LIFEEX")) expect_identical(droplevels(L(wlddev2, -1:1, ~iso3c, cols="LIFEEX")), L(wlddev3, -1:1, ~iso3c, cols="LIFEEX")) expect_identical(droplevels(D(wlddev2, 1, 1, ~iso3c, cols="LIFEEX")), D(wlddev3, 1, 1, ~iso3c, cols="LIFEEX")) expect_identical(droplevels(D(wlddev2, -1:1, 1:2, ~iso3c, cols="LIFEEX")), D(wlddev3, -1:1, 1:2, ~iso3c, cols="LIFEEX")) expect_identical(droplevels(Dlog(wlddev2, 1, 1, ~iso3c, cols="LIFEEX")), Dlog(wlddev3, 1, 1, ~iso3c, cols="LIFEEX")) expect_identical(droplevels(Dlog(wlddev2, -1:1, 1:2, ~iso3c, cols="LIFEEX")), Dlog(wlddev3, -1:1, 1:2, ~iso3c, cols="LIFEEX")) expect_identical(droplevels(D(wlddev2, 1, 1, ~iso3c, cols="LIFEEX", rho = 0.95)), D(wlddev3, 1, 1, ~iso3c, cols="LIFEEX", rho = 0.95)) expect_identical(droplevels(D(wlddev2, -1:1, 1:2, ~iso3c, cols="LIFEEX", rho = 0.95)), D(wlddev3, -1:1, 1:2, ~iso3c, cols="LIFEEX", rho = 0.95)) expect_identical(droplevels(Dlog(wlddev2, 1, 1, ~iso3c, cols="LIFEEX", rho = 0.95)), Dlog(wlddev3, 1, 1, ~iso3c, cols="LIFEEX", rho = 0.95)) expect_identical(droplevels(Dlog(wlddev2, -1:1, 1:2, ~iso3c, cols="LIFEEX", rho = 0.95)), Dlog(wlddev3, -1:1, 1:2, ~iso3c, cols="LIFEEX", rho = 0.95)) expect_identical(droplevels(G(wlddev2, 1, 1, ~iso3c, cols="LIFEEX")), G(wlddev3, 1, 1, ~iso3c, cols="LIFEEX")) expect_identical(droplevels(G(wlddev2, -1:1, 1:2, ~iso3c, cols="LIFEEX")), G(wlddev3, -1:1, 1:2, ~iso3c, cols="LIFEEX")) }) library(magrittr) test_that("Testing grouped_df methods", { gdf <- wlddev %>% fsubset(year > 1990, region, income, PCGDP:ODA) %>% fgroup_by(region, income) gdf[["wgt"]] <- round(abs(10*rnorm(fnrow(gdf))), 1) expect_visible(gdf %>% fmean) expect_visible(gdf %>% fmean(wgt)) expect_equal(gdf %>% fmean(wgt) %>% slt(-sum.wgt), gdf %>% fmean(wgt, keep.w = FALSE)) expect_visible(gdf %>% fmedian) expect_visible(gdf %>% fmedian(wgt)) expect_equal(gdf %>% fmedian(wgt) %>% slt(-sum.wgt), gdf %>% fmedian(wgt, keep.w = FALSE)) expect_visible(gdf %>% fnth) expect_visible(gdf %>% fnth(0.75)) expect_visible(gdf %>% fnth(0.75, wgt)) expect_equal(gdf %>% fnth(0.75, wgt) %>% slt(-sum.wgt), gdf %>% fnth(0.75, wgt, keep.w = FALSE)) expect_visible(gdf %>% fmode) expect_visible(gdf %>% fmode(wgt)) expect_equal(gdf %>% fmode(wgt) %>% slt(-sum.wgt), gdf %>% fmode(wgt, keep.w = FALSE)) expect_visible(gdf %>% fsum) expect_visible(gdf %>% fsum(wgt)) expect_equal(gdf %>% fsum(wgt) %>% slt(-sum.wgt), gdf %>% fsum(wgt, keep.w = FALSE)) expect_visible(gdf %>% fprod) expect_visible(gdf %>% fprod(wgt)) expect_equal(gdf %>% fprod(wgt) %>% slt(-prod.wgt), gdf %>% fprod(wgt, keep.w = FALSE)) expect_visible(gdf %>% fsd) expect_visible(gdf %>% fsd(wgt)) expect_equal(gdf %>% fsd(wgt) %>% slt(-sum.wgt), gdf %>% fsd(wgt, keep.w = FALSE)) expect_visible(gdf %>% fvar) expect_visible(gdf %>% fvar(wgt)) expect_equal(gdf %>% fvar(wgt) %>% slt(-sum.wgt), gdf %>% fvar(wgt, keep.w = FALSE)) expect_visible(gdf %>% fmin) expect_visible(gdf %>% fmax) expect_visible(gdf %>% ffirst) expect_visible(gdf %>% flast) expect_visible(gdf %>% fnobs) expect_visible(gdf %>% fndistinct) expect_visible(gdf %>% collapg) expect_visible(gdf %>% fmean(w = wgt)) # good? expect_equal(gdf %>% collapg(w = wgt) %>% slt(-wgt), gdf %>% collapg(w = wgt, keep.w = FALSE)) expect_visible(gdf %>% fscale) expect_visible(gdf %>% fscale(wgt)) expect_equal(gdf %>% fscale(wgt) %>% slt(-wgt), gdf %>% fscale(wgt, keep.w = FALSE)) expect_visible(gdf %>% STD) expect_visible(gdf %>% STD(wgt)) expect_equal(gdf %>% STD(wgt) %>% slt(-wgt), gdf %>% STD(wgt, keep.w = FALSE)) expect_equal(gdf %>% fscale, gdf %>% STD(stub = FALSE)) expect_visible(gdf %>% fbetween) expect_visible(gdf %>% fbetween(wgt)) expect_equal(gdf %>% fbetween(wgt) %>% slt(-wgt), gdf %>% fbetween(wgt, keep.w = FALSE)) expect_visible(gdf %>% B) expect_visible(gdf %>% B(wgt)) expect_equal(gdf %>% B(wgt) %>% slt(-wgt), gdf %>% B(wgt, keep.w = FALSE)) expect_equal(gdf %>% fbetween, gdf %>% B(stub = FALSE)) expect_visible(gdf %>% fwithin) expect_visible(gdf %>% fwithin(wgt)) expect_equal(gdf %>% fwithin(wgt) %>% slt(-wgt), gdf %>% fwithin(wgt, keep.w = FALSE)) expect_visible(gdf %>% W) expect_visible(gdf %>% W(wgt)) expect_equal(gdf %>% W(wgt) %>% slt(-wgt), gdf %>% W(wgt, keep.w = FALSE)) expect_equal(gdf %>% fwithin, gdf %>% W(stub = FALSE)) expect_visible(gdf %>% fcumsum) expect_visible(gdf %>% flag) expect_visible(gdf %>% L) expect_visible(gdf %>% F) expect_true(all_obj_equal(gdf %>% flag, gdf %>% L(stubs = FALSE), gdf %>% F(-1, stubs = FALSE))) expect_true(all_obj_equal(gdf %>% flag(-3:3), gdf %>% L(-3:3), gdf %>% F(3:-3))) expect_visible(gdf %>% fdiff) expect_visible(gdf %>% D) expect_true(all_obj_equal(gdf %>% fdiff, gdf %>% D(stubs = FALSE))) expect_equal(gdf %>% fdiff(-2:2, 1:2), gdf %>% D(-2:2, 1:2)) expect_visible(gdf %>% fdiff(rho = 0.95)) expect_visible(gdf %>% fdiff(-2:2, 1:2, rho = 0.95)) expect_visible(gdf %>% fdiff(log = TRUE)) expect_visible(gdf %>% fdiff(-2:2, 1:2, log = TRUE)) expect_visible(gdf %>% fdiff(log = TRUE, rho = 0.95)) expect_visible(gdf %>% fdiff(-2:2, 1:2, log = TRUE, rho = 0.95)) expect_visible(gdf %>% fgrowth) expect_visible(gdf %>% G) expect_true(all_obj_equal(gdf %>% fgrowth, gdf %>% G(stubs = FALSE))) expect_equal(gdf %>% fgrowth(-2:2, 1:2), gdf %>% G(-2:2, 1:2)) expect_visible(gdf %>% fgrowth(scale = 1)) expect_visible(gdf %>% fgrowth(-2:2, 1:2, scale = 1)) expect_visible(gdf %>% fgrowth(logdiff = TRUE)) expect_visible(gdf %>% fgrowth(-2:2, 1:2, logdiff = TRUE)) expect_visible(gdf %>% fgrowth(logdiff = TRUE, scale = 1)) expect_visible(gdf %>% fgrowth(-2:2, 1:2, logdiff = TRUE, scale = 1)) expect_equal(BY(gby(iris,Species), sum), BY(nv(gby(iris,Species)), sum)) }) # Also better not run on CRAN... test_that("0-length vectors give expected output", { funs <- .c(fsum, fprod, fmean, fmedian, fmin, fmax, fnth, fcumsum, fbetween, fwithin, fscale) for(i in funs) { FUN <- match.fun(i) if(i %!in% .c(fsum, fmin, fmax, fcumsum)) { expect_true(all_identical(FUN(numeric(0)), FUN(integer(0)), numeric(0))) } else { expect_identical(FUN(numeric(0)), numeric(0)) expect_identical(FUN(integer(0)), integer(0)) } } funs <- .c(fmode, ffirst, flast) for(i in funs) { FUN <- match.fun(i) expect_identical(FUN(numeric(0)), numeric(0)) expect_identical(FUN(integer(0)), integer(0)) expect_identical(FUN(character(0)), character(0)) expect_identical(FUN(logical(0)), logical(0)) expect_identical(FUN(factor(0)), factor(0)) } funs <- .c(fvar, fsd) for(i in funs) { FUN <- match.fun(i) expect_identical(FUN(numeric(0)), NA_real_) expect_identical(FUN(integer(0)), NA_real_) } funs <- .c(fnobs, fndistinct) for(i in funs) { FUN <- match.fun(i) expect_identical(FUN(numeric(0)), 0L) expect_identical(FUN(integer(0)), 0L) } funs <- .c(flag, fdiff, fgrowth) for(i in funs) { FUN <- match.fun(i) expect_error(FUN(numeric(0))) expect_error(FUN(integer(0))) } funs <- .c(groupid, seqid) for(i in funs) { FUN <- match.fun(i) expect_identical(FUN(numeric(0)), integer(0)) expect_identical(FUN(integer(0)), integer(0)) } expect_identical(varying(numeric(0)), FALSE) expect_identical(TRA(numeric(0), 1), numeric(0)) }) } X <- matrix(rnorm(1000), ncol = 10) g <- qG(sample.int(10, 100, TRUE)) gf <- as_factor_qG(g) funs <- grep("hd|log", c(.FAST_FUN, .OPERATOR_FUN), ignore.case = TRUE, invert = TRUE, value = TRUE) test_that("functions work on plain matrices", { for(i in funs) { expect_visible(match.fun(i)(X)) expect_visible(match.fun(i)(X, g = g)) expect_visible(match.fun(i)(X, g = gf)) expect_visible(match.fun(i)(X, g = g, use.g.names = FALSE)) expect_visible(match.fun(i)(X, g = gf, use.g.names = FALSE)) } }) Xl <- mctl(X) test_that("functions work on plain lists", { for(i in funs) { expect_visible(match.fun(i)(Xl)) expect_visible(match.fun(i)(Xl, g = g, by = g)) expect_visible(match.fun(i)(Xl, g = gf, by = gf)) expect_visible(match.fun(i)(X, g = g, by = g, use.g.names = FALSE)) expect_visible(match.fun(i)(X, g = gf, by = gf, use.g.names = FALSE)) } }) test_that("time series functions work inside lm", { expect_equal(unname(coef(lm(mpg ~ L(cyl, 0:2), mtcars))), unname(coef(lm(mpg ~ cyl + L(cyl, 1) + L(cyl, 2), mtcars)))) expect_equal(unname(coef(lm(mpg ~ F(cyl, 0:2), mtcars))), unname(coef(lm(mpg ~ cyl + F(cyl, 1) + F(cyl, 2), mtcars)))) expect_equal(unname(coef(lm(mpg ~ D(cyl, 0:2), mtcars))), unname(coef(lm(mpg ~ cyl + D(cyl, 1) + D(cyl, 2), mtcars)))) expect_equal(unname(coef(lm(mpg ~ G(cyl, 0:2), mtcars))), unname(coef(lm(mpg ~ cyl + G(cyl, 1) + G(cyl, 2), mtcars)))) expect_equal(unname(coef(lm(mpg ~ L(L(cyl, 0:2)), mtcars))), unname(coef(lm(mpg ~ L(cyl) + L(cyl, 2) + L(cyl, 3), mtcars)))) expect_equal(unname(coef(lm(mpg ~ L(F(cyl, 0:2)), mtcars))), unname(coef(lm(mpg ~ L(cyl) + cyl + F(cyl, 1), mtcars)))) expect_equal(unname(coef(lm(mpg ~ L(D(cyl, 0:2)), mtcars))), unname(coef(lm(mpg ~ L(cyl) + L(D(cyl)) + L(D(cyl, 2)), mtcars)))) expect_equal(unname(coef(lm(mpg ~ L(G(cyl, 0:2)), mtcars))), unname(coef(lm(mpg ~ L(cyl) + L(G(cyl)) + L(G(cyl, 2)), mtcars)))) }) test_that("functions using welfords method properly deal with zero weights", { for(g in list(NULL, rep(1L, 3))) { expect_equal(unattrib(fvar(x = c(2, 1, 0), g = g, w = c(1, 1, 0), na.rm = TRUE)), 0.5) expect_equal(unattrib(fvar(x = c(2, 1, 3), g = g, w = c(0, 1, 1), na.rm = FALSE)), 2) expect_equal(unattrib(fsd(x = c(2, 1, 0), g = g, w = c(1, 1, 0), na.rm = TRUE)), sqrt(0.5)) expect_equal(unattrib(fsd(x = c(2, 1, 3), g = g, w = c(0, 1, 1), na.rm = FALSE)), sqrt(2)) expect_equal(unattrib(fscale(x = c(2, 1, 0), g = g, w = c(1, 1, 0), na.rm = TRUE)), (c(2, 1, 0)-1.5)/sqrt(0.5)) expect_equal(unattrib(fscale(x = c(2, 1, 3), g = g, w = c(0, 1, 1), na.rm = FALSE)), (c(2, 1, 3)-2)/sqrt(2)) expect_equal(unattrib(qsu(x = c(2, 1, 0), g = g, w = c(1, 1, 0))), c(2, 1.5, sqrt(0.5), 1, 2)) expect_equal(unattrib(qsu(x = c(2, 1, 3), g = g, w = c(0, 1, 1))), c(2, 2, sqrt(2), 1, 3)) expect_equal(unattrib(qsu(x = c(2, 1, 0), g = g, w = c(1, 1, 0), higher = TRUE))[1:5], c(2, 1.5, sqrt(0.5), 1, 2)) expect_equal(unattrib(qsu(x = c(2, 1, 3), g = g, w = c(0, 1, 1), higher = TRUE))[1:5], c(2, 2, sqrt(2), 1, 3)) } }) options(warn = 1) collapse/tests/testthat/test-collap.R0000644000176200001440000013472314201327077017445 0ustar liggesuserscontext("collap") bsum <- base::sum bmean <- base::mean # rm(list = ls()) options(warn = -1) g <- GRP(wlddev, ~ country + decade) oa <- function(x) setAttrib(unattrib(x), attributes(x)[c("names", "row.names", "class")]) # Should use above, but sometimes still gives errors if(Sys.getenv("NCRAN") != "TRUE") oa <- function(x) setNames(unattrib(x), names(x)) Mode <- function(x, na.rm = FALSE) { if(na.rm) x <- x[!is.na(x)] ux <- unique(x) ux[which.max(tabulate(match(x, ux)))] } # TODO: What about other return options and weighted multi-function aggregation ? And what about grouped_df method.. test_that("collap performs as intended in simple uses", { expect_equal(collap(mtcars, mtcars$cyl, keep.by = FALSE), fmean(mtcars, mtcars$cyl, use.g.names = FALSE)) expect_equal(collap(mtcars, mtcars[2], keep.by = FALSE), fmean(mtcars, mtcars$cyl, use.g.names = FALSE)) expect_equal(collap(mtcars, ~cyl), fmean(mtcars, mtcars$cyl, use.g.names = FALSE)) expect_equal(collap(mtcars, ~cyl, keep.by = FALSE), fmean(mtcars[-2], mtcars$cyl, use.g.names = FALSE)) expect_equal(collap(iris, ~Species, keep.by = FALSE), fmean(iris[-5], iris$Species, use.g.names = FALSE)) expect_equal(collap(airquality, ~Month, keep.by = FALSE), fmean(airquality[-5], airquality$Month, use.g.names = FALSE)) expect_equal(oa(collap(wlddev, ~ country + decade, keep.col.order = FALSE)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,4,9:13,2:3,6:8))]) expect_equal(oa(collap(wlddev, ~ country + decade, keep.col.order = FALSE, keep.by = FALSE)), oa(cbind(fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, keep.by = FALSE)), oa(cbind(fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(4,9:13,2:3,6:8))]) expect_equal(oa(collap(wlddev, g, keep.by = FALSE)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,4,9:13,2:3,6:8))]) expect_equal(oa(collap(wlddev, wlddev[c("country","decade")], keep.by = FALSE)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,4,9:13,2:3,6:8))]) }) test_that("collap preserves data attributes", { expect_identical(lapply(collap(wlddev, ~country), attributes), lapply(wlddev, attributes)) expect_identical(vclasses(collap(wlddev, ~country, fmin)), vclasses(wlddev)) expect_identical(vtypes(collap(wlddev, ~country, fmax)), vtypes(wlddev)) expect_identical(lapply(collap(wlddev, ~iso3c), attributes), lapply(wlddev, attributes)) expect_identical(vclasses(collap(wlddev, ~iso3c, fmin)), vclasses(wlddev)) expect_identical(vtypes(collap(wlddev, ~iso3c, fmax)), vtypes(wlddev)) expect_identical(lapply(collap(wlddev, ~date), attributes), lapply(wlddev, attributes)) expect_identical(vclasses(collap(wlddev, ~date, fmin)), vclasses(wlddev)) expect_identical(vtypes(collap(wlddev, ~date, fmax)), vtypes(wlddev)) expect_identical(lapply(collap(wlddev, ~country + decade), attributes), lapply(wlddev, attributes)) expect_identical(vclasses(collap(wlddev, ~country + decade, fmin)), vclasses(wlddev)) expect_identical(vtypes(collap(wlddev, ~country + decade, fmax)), vtypes(wlddev)) }) # if(Sys.getenv("NCRAN") == "TRUE") test_that("collap performs as intended in simple uses with base/stats functions", { expect_equal(oa(collap(mtcars, mtcars$cyl, bsum, keep.by = FALSE)), oa(fsum(mtcars, mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collap(mtcars, ~cyl, mean.default)), oa(fmean(mtcars, mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collap(mtcars, ~cyl, bmean)), oa(fmean(mtcars, mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collap(mtcars, mtcars[2], bsum, keep.by = FALSE)), oa(fsum(mtcars, mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collap(mtcars, ~cyl, bsum, keep.by = FALSE)), oa(fsum(mtcars[-2], mtcars$cyl, use.g.names = FALSE))) expect_equal(unattrib(collap(iris, ~Species, bsum, keep.by = FALSE)), unattrib(fsum(iris[-5], iris$Species, use.g.names = FALSE))) expect_equal(oa(collap(airquality, ~Month, bsum, na.rm = TRUE, keep.by = FALSE)), oa(fsum(airquality[-5], airquality$Month, use.g.names = FALSE))) expect_equal(oa(collap(wlddev, ~ country + decade, bsum, Mode, na.rm = TRUE, keep.col.order = FALSE)), oa(cbind(g$groups, BY(get_vars(wlddev, c(4,9:13)), g, bsum, na.rm = TRUE, use.g.names = FALSE), BY(get_vars(wlddev, c(2:3,6:8)), g, Mode, na.rm = TRUE, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, bsum, Mode, na.rm = TRUE)), oa(cbind(g$groups, BY(get_vars(wlddev, c(4,9:13)), g, bsum, na.rm = TRUE, use.g.names = FALSE), BY(get_vars(wlddev, c(2:3,6:8)), g, Mode, na.rm = TRUE, use.g.names = FALSE)))[order(c(1,5,4,9:13,2:3,6:8))]) }) test_that("collap using 2-sided formula or cols performs as intended", { expect_equal(oa(collap(mtcars, mpg ~ cyl, keep.by = FALSE)), oa(fmean(mtcars["mpg"], mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collap(mtcars, mpg ~ cyl, keep.by = FALSE, cols = 300:1000)), oa(fmean(mtcars["mpg"], mtcars$cyl, use.g.names = FALSE))) # cols is ignored, as should be expect_equal(oa(collap(mtcars, ~ cyl, keep.by = FALSE, cols = 1)), oa(fmean(mtcars["mpg"], mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collap(mtcars, wt + mpg ~ cyl + vs + am, keep.by = FALSE)), oa(fmean(mtcars[c("mpg","wt")], mtcars[c("cyl","vs","am")], use.g.names = FALSE))) expect_equal(oa(collap(mtcars, ~ cyl + vs + am, keep.by = FALSE, cols = c(6,1))), oa(fmean(mtcars[c("mpg","wt")], mtcars[c("cyl","vs","am")], use.g.names = FALSE))) expect_equal(oa(collap(iris, Sepal.Length + Sepal.Width ~ Species, keep.by = FALSE)), oa(fmean(iris[1:2], iris$Species, use.g.names = FALSE))) expect_equal(oa(collap(airquality, ~ Month, keep.by = FALSE)), oa(fmean(airquality[-5], airquality$Month, use.g.names = FALSE))) expect_equal(oa(collap(airquality, ~ Month, keep.by = FALSE, cols = 1:3)), oa(fmean(airquality[1:3], airquality$Month, use.g.names = FALSE))) expect_equal(oa(collap(wlddev, ~ country + decade, cols = 9:13)), oa(collap(wlddev, PCGDP + LIFEEX + GINI + ODA + POP ~ country + decade))) expect_equal(oa(collap(wlddev, ~ country + decade, cols = 9:13)), oa(collap(wlddev, ~ country + decade, cols = 9:13, keep.col.order = FALSE))) expect_equal(oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8))), oa(collap(wlddev, iso3c + date + region + income + OECD ~ country + decade))) expect_false(identical(oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8))), oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8), keep.col.order = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, cols = 9:12, keep.by = FALSE)), oa(collap(wlddev, PCGDP + LIFEEX + GINI + ODA ~ country + decade, keep.by = FALSE))) expect_equal(oa(collap(wlddev, ~ country + decade, cols = 9:13, keep.by = FALSE)), oa(collap(wlddev, ~ country + decade, cols = 9:13, keep.col.order = FALSE, keep.by = FALSE))) expect_equal(oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8), keep.by = FALSE)), oa(collap(wlddev, iso3c + date + region + income + OECD ~ country + decade, keep.by = FALSE))) expect_equal(oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8), keep.by = FALSE)), oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8), keep.col.order = FALSE, keep.by = FALSE))) expect_equal(oa(collap(wlddev, g, cols = 9:12, keep.by = FALSE)), oa(collap(wlddev, PCGDP + LIFEEX + GINI + ODA ~ country + decade, keep.by = FALSE))) expect_equal(oa(collap(wlddev, g, cols = 9:13, keep.by = FALSE)), oa(collap(wlddev, ~ country + decade, cols = 9:13, keep.col.order = FALSE, keep.by = FALSE))) expect_equal(oa(collap(wlddev, g, cols = c(2:3,6:8), keep.by = FALSE)), oa(collap(wlddev, iso3c + date + region + income + OECD ~ country + decade, keep.by = FALSE))) expect_equal(oa(collap(wlddev, g, cols = c(2:3,6:8), keep.by = FALSE)), oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8), keep.col.order = FALSE, keep.by = FALSE))) expect_equal(oa(collap(wlddev, wlddev[c("country","decade")], cols = 9:12, keep.by = FALSE)), oa(collap(wlddev, PCGDP + LIFEEX + GINI + ODA ~ country + decade, keep.by = FALSE))) expect_equal(oa(collap(wlddev, wlddev[c("country","decade")], cols = 9:13, keep.by = FALSE)), oa(collap(wlddev, ~ country + decade, cols = 9:13, keep.col.order = FALSE, keep.by = FALSE))) expect_equal(oa(collap(wlddev, wlddev[c("country","decade")], cols = c(2:3,6:8), keep.by = FALSE)), oa(collap(wlddev, iso3c + date + region + income + OECD ~ country + decade, keep.by = FALSE))) expect_equal(oa(collap(wlddev, wlddev[c("country","decade")], cols = c(2:3,6:8), keep.by = FALSE)), oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8), keep.col.order = FALSE, keep.by = FALSE))) }) test_that("collap multi-function aggreagtion performs as intended", { expect_equal(oa(collap(wlddev, ~ country + decade, list(fmean, fmedian), keep.col.order = FALSE, give.names = FALSE)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmedian(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) if(Sys.getenv("NCRAN") == "TRUE") expect_equal(oa(collap(wlddev, ~ country + decade, list(fmean, fmedian), list(fmode, flast), keep.col.order = FALSE, give.names = FALSE)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmedian(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE), flast(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) # with column ordering: expect_equal(unname(oa(collap(wlddev, ~ country + decade, list(fmean, fmedian)))), unname(oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmedian(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE))[order(c(1,5,4,9:13,4,9:13,2:3,6:8))]))) expect_equal(unname(oa(collap(wlddev, ~ country + decade, list(fmean, fmedian), list(fmode, flast)))), unname(oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmedian(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE), flast(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,4,9:13,4,9:13,2:3,6:8,2:3,6:8))])) }) test_that("collap custom aggreagtion performs as intended", { expect_equal(unname(oa(collap(wlddev, ~ country + decade, custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8), keep.col.order = FALSE))), unname(oa(cbind(g$groups, fmean(wlddev[9:13], g, use.g.names = FALSE), fsd(wlddev[9:10], g, use.g.names = FALSE), fmode(wlddev[7:8], g, use.g.names = FALSE))))) expect_equal(unname(oa(collap(wlddev, ~ country + decade, custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8)))), unname(oa(cbind(g$groups, fmean(wlddev[9:13], g, use.g.names = FALSE), fsd(wlddev[9:10], g, use.g.names = FALSE), fmode(wlddev[7:8], g, use.g.names = FALSE)))[order(c(1,5,9:13,9:10,7:8))])) expect_equal(oa(collap(wlddev, ~ country + decade, custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8))), oa(collap(wlddev, ~ country + decade, custom = list(fmean = 9:13, fsd = c("PCGDP","LIFEEX"), fmode = 7:8)))) expect_equal(oa(collap(wlddev, ~ country + decade, custom = list(fmean = "PCGDP", fsd = c("LIFEEX","GINI"), flast = "date"))), oa(collap(wlddev, ~ country + decade, custom = list(fmean = "PCGDP", fsd = 10:11, flast = "date")))) expect_equal(oa(collap(wlddev, g, custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8))), oa(collap(wlddev, ~ country + decade, custom = list(fmean = 9:13, fsd = c("PCGDP","LIFEEX"), fmode = 7:8)))) expect_equal(oa(collap(wlddev, g, custom = list(fmean = "PCGDP", fsd = c("LIFEEX","GINI"), flast = "date"))), oa(collap(wlddev, g, custom = list(fmean = "PCGDP", fsd = 10:11, flast = "date")))) expect_equal(names(collap(wlddev, g, custom = list(fmean = c(GDP = "PCGDP"), fsd = c("LIFEEX", GN = "GINI"), flast = "date"), keep.by = FALSE, keep.col.order = FALSE)), .c(GDP, LIFEEX, GN, date)) }) test_that("collap weighted aggregations work as intended", { # Not keeping order ... expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.col.order = FALSE)), oa(add_vars(g$groups, fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.col.order = FALSE, keep.by = FALSE)), oa(add_vars(fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.col.order = FALSE, keep.w = FALSE)), oa(add_vars(g$groups, fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.col.order = FALSE, keep.by = FALSE, keep.w = FALSE)), oa(add_vars(fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) # keeping order ... expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP)), oa(add_vars(g$groups, fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[names(wlddev)]) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.by = FALSE)), oa(add_vars(fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), g$group.vars)]) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.w = FALSE)), oa(add_vars(g$groups, fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), "POP")]) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.by = FALSE, keep.w = FALSE)), oa(add_vars(fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), c(g$group.vars, "POP"))]) }) if(Sys.getenv("NCRAN") == "TRUE") test_that("collap multi-function aggreagtion with weights performs as intended", { expect_equal(oa(collap(wlddev, ~ country + decade, list(fmean, fsd), w = ~ POP, keep.col.order = FALSE, give.names = FALSE)), oa(cbind(g$groups, fsum(get_vars(wlddev, 13), g, use.g.names = FALSE), fmean(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fsd(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, list(fmean, fsd), list(fmode, flast), w = ~ POP, keep.col.order = FALSE, give.names = FALSE)), oa(cbind(g$groups, fsum(get_vars(wlddev, 13), g, use.g.names = FALSE), fmean(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fsd(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, wlddev$POP, use.g.names = FALSE), flast(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) # with column ordering: expect_equal(unname(oa(collap(wlddev, ~ country + decade, list(fmean, fsd), w = ~ POP, wFUN = list(fsum, fmax)))), unname(oa(cbind(g$groups, fsum(get_vars(wlddev, 13), g, use.g.names = FALSE), fmax(get_vars(wlddev, 13), g, use.g.names = FALSE), fmean(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fsd(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, wlddev$POP, use.g.names = FALSE)))[order(c(1,5,13,13,4,9:12,4,9:12,2:3,6:8))])) expect_equal(unname(oa(collap(wlddev, ~ country + decade, list(fmean, fsd), list(fmode, flast), w = ~ POP, wFUN = list(fsum, fmax)))), unname(oa(cbind(g$groups, fsum(get_vars(wlddev, 13), g, use.g.names = FALSE), fmax(get_vars(wlddev, 13), g, use.g.names = FALSE), fmean(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fsd(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, wlddev$POP, use.g.names = FALSE), flast(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,13,13,4,9:12,4,9:12,2:3,6:8,2:3,6:8))])) }) v1 <- c("year","PCGDP","LIFEEX","GINI","ODA") v2 <- c("iso3c","date","region","income", "OECD") test_that("collap weighted customized aggregation works as intended", { # Not keeping order ... expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, custom = list(fmean = v1, fmode = v2), keep.col.order = FALSE, give.names = FALSE)), oa(add_vars(g$groups, fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(unattrib(collap(wlddev, ~ country + decade, w = ~ POP, custom = list(fmean = v1, fmode = v2), keep.col.order = FALSE, keep.by = FALSE, give.names = FALSE)), unattrib(add_vars(fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, custom = list(fmean = v1, fmode = v2), keep.col.order = FALSE, keep.w = FALSE, give.names = FALSE)), oa(add_vars(g$groups, fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, custom = list(fmean = v1, fmode = v2), keep.col.order = FALSE, keep.by = FALSE, keep.w = FALSE, give.names = FALSE)), oa(add_vars(fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) # keeping order ... expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, custom = list(fmean = v1, fmode = v2), give.names = FALSE)), oa(add_vars(g$groups, fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[names(wlddev)]) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.by = FALSE, custom = list(fmean = v1, fmode = v2), give.names = FALSE)), oa(add_vars(fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), g$group.vars)]) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.w = FALSE, custom = list(fmean = v1, fmode = v2), give.names = FALSE)), oa(add_vars(g$groups, fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), "POP")]) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.by = FALSE, keep.w = FALSE, custom = list(fmean = v1, fmode = v2), give.names = FALSE)), oa(add_vars(fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), c(g$group.vars, "POP"))]) }) test_that("collap gives informative errors", { expect_error(collap(~cyl, ~cyl)) # nah, need to give error in qDF expect_error(collap(wlddev, 1:3)) # only gives error in fmean.. a bit late.. expect_error(collap(wlddev, "country")) # same thing expect_error(collap(wlddev, ~ country1)) expect_error(collap(wlddev, ~ country, w = ~bla)) expect_error(collap(wlddev, ~ country, w = ~POP, wFUN = bsum)) expect_error(collap(wlddev, ~ country + year + bla)) expect_error(collap(wlddev, bla ~ country)) expect_warning(collap(wlddev, ~ country, bla = 1)) # passes to fmean.data.frame which give the error. # expect_error(collap(wlddev, ~ country, bsum, cols = 9:13, bla = 1)) # This is an issue, bsum(1:3, bla = 1) does not give an error expect_error(collap(wlddev, mtcars$cyl)) # again fmean error.. expect_error(collap(wlddev, ~iso3c, cols = 9:14)) # expect_error(collap(wlddev, ~iso3c, cols = 0:1)) # no error.. expect_error(collap(wlddev, ~iso3c, cols = c("PCGDP","bla"))) expect_error(collap(wlddev, ~iso3c, cols = c("PCGDP","LIFEEX1"))) expect_error(collap(wlddev, ~iso3c, custom = ~ PCGDP)) expect_error(collap(wlddev, ~iso3c, custom = list(fmean, fmode))) expect_error(collap(wlddev, ~iso3c, custom = list(fmean = 9:14, fmode = 4:6))) expect_error(collap(wlddev, ~iso3c, custom = list(fmean = 9:13, 4:6))) expect_error(collap(wlddev, ~iso3c, custom = list(fmean = 9:13, fmode2 = 4:6))) expect_error(collap(wlddev, ~iso3c, custom = list(fmean = 9:13, fmode = c("GINI","bla")))) expect_error(collap(wlddev, ~iso3c, custom = list(fmean = 9:13, fmode = c("GINI","PCGDP2")))) }) # Note: one more thing to test is performance with vector-valued functions... # Testing collapv v <- c(1, 5) test_that("collapv performs as intended in simple uses", { expect_equal(oa(collapv(mtcars, 2)), oa(fmean(mtcars, mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collapv(mtcars, 2, keep.by = FALSE)), oa(fmean(mtcars[-2], mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collapv(iris, "Species", keep.by = FALSE)), oa(fmean(iris[-5], iris$Species, use.g.names = FALSE))) expect_equal(oa(collapv(airquality, "Month", keep.by = FALSE)), oa(fmean(airquality[-5], airquality$Month, use.g.names = FALSE))) expect_equal(oa(collapv(wlddev, v, keep.col.order = FALSE)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,4,9:13,2:3,6:8))]) expect_equal(oa(collapv(wlddev, v, keep.col.order = FALSE, keep.by = FALSE)), oa(cbind(fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, keep.by = FALSE)), oa(cbind(fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(4,9:13,2:3,6:8))]) expect_equal(names(collapv(wlddev, v, custom = list(fmean = c(GDP = "PCGDP"), fsd = c("LIFEEX", GN = "GINI"), flast = "date"), keep.by = FALSE, keep.col.order = FALSE)), .c(GDP, LIFEEX, GN, date)) }) test_that("collapv preserves data attributes", { expect_identical(lapply(collapv(wlddev, 1), attributes), lapply(wlddev, attributes)) expect_identical(vclasses(collapv(wlddev, 1, fmax)), vclasses(wlddev)) expect_identical(vtypes(collapv(wlddev, 1, fmin)), vtypes(wlddev)) expect_identical(lapply(collapv(wlddev, "iso3c"), attributes), lapply(wlddev, attributes)) expect_identical(vclasses(collapv(wlddev, "iso3c", fmax)), vclasses(wlddev)) expect_identical(vtypes(collapv(wlddev, "iso3c", fmin)), vtypes(wlddev)) expect_identical(lapply(collapv(wlddev, "date"), attributes), lapply(wlddev, attributes)) expect_identical(vclasses(collapv(wlddev, "date", ffirst)), vclasses(wlddev)) expect_identical(vtypes(collapv(wlddev, "date", flast)), vtypes(wlddev)) expect_identical(lapply(collapv(wlddev, v), attributes), lapply(wlddev, attributes)) expect_identical(vclasses(collapv(wlddev, v, flast)), vclasses(wlddev)) expect_identical(vtypes(collapv(wlddev, v, ffirst)), vtypes(wlddev)) }) # if(Sys.getenv("NCRAN") == "TRUE") test_that("collapv performs as intended in simple uses with base/stats functions", { expect_equal(oa(collapv(mtcars, "cyl", mean.default)), oa(fmean(mtcars, mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collapv(mtcars, "cyl", bmean)), oa(fmean(mtcars, mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collapv(mtcars, 2, bsum, keep.by = FALSE)), oa(fsum(mtcars[-2], mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collapv(iris, 5, bsum, keep.by = FALSE)), oa(fsum(iris[-5], iris$Species, use.g.names = FALSE))) expect_equal(oa(collapv(airquality, "Month", bsum, na.rm = TRUE, keep.by = FALSE)), oa(fsum(airquality[-5], airquality$Month, use.g.names = FALSE))) expect_equal(oa(collapv(wlddev, v, bsum, Mode, na.rm = TRUE, keep.col.order = FALSE)), oa(cbind(g$groups, BY(get_vars(wlddev, c(4,9:13)), g, bsum, na.rm = TRUE, use.g.names = FALSE), BY(get_vars(wlddev, c(2:3,6:8)), g, Mode, na.rm = TRUE, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, bsum, Mode, na.rm = TRUE)), oa(cbind(g$groups, BY(get_vars(wlddev, c(4,9:13)), g, bsum, na.rm = TRUE, use.g.names = FALSE), BY(get_vars(wlddev, c(2:3,6:8)), g, Mode, na.rm = TRUE, use.g.names = FALSE)))[order(c(1,5,4,9:13,2:3,6:8))]) }) test_that("collapv using cols performs as intended", { expect_equal(oa(collapv(mtcars, 2, keep.by = FALSE, cols = 1)), oa(fmean(mtcars["mpg"], mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collapv(mtcars, c("cyl", "vs", "am"), keep.by = FALSE, cols = c(6,1))), oa(fmean(mtcars[c("mpg","wt")], mtcars[c("cyl","vs","am")], use.g.names = FALSE))) expect_equal(oa(collapv(airquality, "Month", keep.by = FALSE)), oa(fmean(airquality[-5], airquality$Month, use.g.names = FALSE))) expect_equal(oa(collapv(airquality, "Month", keep.by = FALSE, cols = 1:3)), oa(fmean(airquality[1:3], airquality$Month, use.g.names = FALSE))) expect_equal(oa(collapv(wlddev, v, cols = 9:12)), oa(collap(wlddev, PCGDP + LIFEEX + GINI + ODA ~ country + decade))) expect_equal(oa(collapv(wlddev, v, cols = 9:13)), oa(collap(wlddev, ~ country + decade, cols = 9:13, keep.col.order = FALSE))) expect_equal(oa(collapv(wlddev, v, cols = c(2:3,6:8))), oa(collap(wlddev, iso3c + date + region + income + OECD ~ country + decade))) expect_false(identical(collapv(wlddev, v, cols = c(2:3,6:8)), collap(wlddev, ~ country + decade, cols = c(2:3,6:8), keep.col.order = FALSE))) expect_equal(oa(collapv(wlddev, v, cols = 9:12, keep.by = FALSE)), oa(collap(wlddev, PCGDP + LIFEEX + GINI + ODA ~ country + decade, keep.by = FALSE))) expect_equal(oa(collapv(wlddev, v, cols = 9:13, keep.by = FALSE)), oa(collap(wlddev, ~ country + decade, cols = 9:13, keep.col.order = FALSE, keep.by = FALSE))) expect_equal(oa(collapv(wlddev, v, cols = c(2:3,6:8), keep.by = FALSE)), oa(collap(wlddev, iso3c + date + region + income + OECD ~ country + decade, keep.by = FALSE))) expect_equal(oa(collapv(wlddev, v, cols = c(2:3,6:8), keep.by = FALSE)), oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8), keep.col.order = FALSE, keep.by = FALSE))) }) test_that("collapv multi-function aggreagtion performs as intended", { expect_equal(oa(collapv(wlddev, v, list(fmean, fmedian), keep.col.order = FALSE, give.names = FALSE)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmedian(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, list(fmean, fmedian), list(fmode, flast), keep.col.order = FALSE, give.names = FALSE)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmedian(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE), flast(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) # with column ordering: expect_equal(unname(oa(collapv(wlddev, v, list(fmean, fmedian)))), unname(oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmedian(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,4,9:13,4,9:13,2:3,6:8))])) expect_equal(unname(oa(collapv(wlddev, v, list(fmean, fmedian), list(fmode, flast)))), unname(oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmedian(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE), flast(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,4,9:13,4,9:13,2:3,6:8,2:3,6:8))])) }) test_that("collapv custom aggreagtion performs as intended", { expect_equal(unname(oa(collapv(wlddev, v, custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8), keep.col.order = FALSE))), unname(oa(cbind(g$groups, fmean(wlddev[9:13], g, use.g.names = FALSE), fsd(wlddev[9:10], g, use.g.names = FALSE), fmode(wlddev[7:8], g, use.g.names = FALSE))))) expect_equal(unname(oa(collapv(wlddev, v, custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8)))), unname(oa(cbind(g$groups, fmean(wlddev[9:13], g, use.g.names = FALSE), fsd(wlddev[9:10], g, use.g.names = FALSE), fmode(wlddev[7:8], g, use.g.names = FALSE)))[order(c(1,5,9:13,9:10,7:8))])) expect_equal(oa(collapv(wlddev, v, custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8))), oa(collapv(wlddev, v, custom = list(fmean = 9:13, fsd = c("PCGDP","LIFEEX"), fmode = 7:8)))) expect_equal(oa(collapv(wlddev, v, custom = list(fmean = "PCGDP", fsd = c("LIFEEX","GINI"), flast = "date"))), oa(collapv(wlddev, v, custom = list(fmean = "PCGDP", fsd = 10:11, flast = "date")))) expect_equal(oa(collapv(wlddev, v, custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8))), oa(collapv(wlddev, v, custom = list(fmean = 9:13, fsd = c("PCGDP","LIFEEX"), fmode = 7:8)))) expect_equal(oa(collapv(wlddev, v, custom = list(fmean = "PCGDP", fsd = c("LIFEEX","GINI"), flast = "date"))), oa(collapv(wlddev, v, custom = list(fmean = "PCGDP", fsd = 10:11, flast = "date")))) }) test_that("collapv weighted aggregations work as intended", { # Not keeping order ... expect_equal(oa(collapv(wlddev, v, w = "POP", keep.col.order = FALSE)), oa(add_vars(g$groups, fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, w = "POP", keep.col.order = FALSE, keep.by = FALSE)), oa(add_vars(fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, w = "POP", keep.col.order = FALSE, keep.w = FALSE)), oa(add_vars(g$groups, fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, w = "POP", keep.col.order = FALSE, keep.by = FALSE, keep.w = FALSE)), oa(add_vars(fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) # keeping order ... expect_equal(oa(collapv(wlddev, v, w = "POP")), oa(add_vars(g$groups, fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[names(wlddev)]) expect_equal(unattrib(collapv(wlddev, v, w = "POP", keep.by = FALSE)), unattrib(add_vars(fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE))[setdiff(names(wlddev), g$group.vars)])) expect_equal(oa(collapv(wlddev, v, w = "POP", keep.w = FALSE)), oa(add_vars(g$groups, fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), "POP")]) expect_equal(oa(collapv(wlddev, v, w = "POP", keep.by = FALSE, keep.w = FALSE)), oa(add_vars(fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), c(g$group.vars, "POP"))]) }) if(Sys.getenv("NCRAN") == "TRUE") test_that("collapv multi-function aggreagtion with weights performs as intended", { expect_equal(oa(collapv(wlddev, v, list(fmean, fsd), w = "POP", keep.col.order = FALSE, give.names = FALSE)), oa(cbind(g$groups, fsum(get_vars(wlddev, 13), g, use.g.names = FALSE), fmean(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fsd(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, list(fmean, fsd), list(fmode, flast), w = "POP", keep.col.order = FALSE, give.names = FALSE)), oa(cbind(g$groups, fsum(get_vars(wlddev, 13), g, use.g.names = FALSE), fmean(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fsd(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, wlddev$POP, use.g.names = FALSE), flast(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) # with column ordering: expect_equal(unname(oa(collapv(wlddev, v, list(fmean, fsd), w = "POP", wFUN = list(fsum, fmax)))), unname(oa(cbind(g$groups, fsum(get_vars(wlddev, 13), g, use.g.names = FALSE), fmax(get_vars(wlddev, 13), g, use.g.names = FALSE), fmean(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fsd(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, wlddev$POP, use.g.names = FALSE)))[order(c(1,5,13,13,4,9:12,4,9:12,2:3,6:8))])) expect_equal(unattrib(collapv(wlddev, v, list(fmean, fsd), list(fmode, flast), w = "POP", wFUN = list(fsum, fmax))), unattrib(cbind(g$groups, fsum(get_vars(wlddev, 13), g, use.g.names = FALSE), fmax(get_vars(wlddev, 13), g, use.g.names = FALSE), fmean(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fsd(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, wlddev$POP, use.g.names = FALSE), flast(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,13,13,4,9:12,4,9:12,2:3,6:8,2:3,6:8))]) }) v1 <- c("year","PCGDP","LIFEEX","GINI","ODA") v2 <- c("iso3c","date","region","income", "OECD") test_that("collapv weighted customized aggregation works as intended", { # Not keeping order ... expect_equal(oa(collapv(wlddev, v, w = "POP", custom = list(fmean = v1, fmode = v2), keep.col.order = FALSE, give.names = FALSE)), oa(add_vars(g$groups, fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(unattrib(collapv(wlddev, v, w = "POP", custom = list(fmean = v1, fmode = v2), keep.col.order = FALSE, keep.by = FALSE, give.names = FALSE)), unattrib(add_vars(fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, w = "POP", custom = list(fmean = v1, fmode = v2), keep.col.order = FALSE, keep.w = FALSE, give.names = FALSE)), oa(add_vars(g$groups, fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, w = "POP", custom = list(fmean = v1, fmode = v2), keep.col.order = FALSE, keep.by = FALSE, keep.w = FALSE, give.names = FALSE)), oa(add_vars(fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) # keeping order ... expect_equal(oa(collapv(wlddev, v, w = "POP", custom = list(fmean = v1, fmode = v2), give.names = FALSE)), oa(add_vars(g$groups, fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[names(wlddev)]) expect_equal(oa(collapv(wlddev, v, w = "POP", keep.by = FALSE, custom = list(fmean = v1, fmode = v2), give.names = FALSE)), oa(add_vars(fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), g$group.vars)]) expect_equal(oa(collapv(wlddev, v, w = "POP", keep.w = FALSE, custom = list(fmean = v1, fmode = v2), give.names = FALSE)), oa(add_vars(g$groups, fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), "POP")]) expect_equal(oa(collapv(wlddev, v, w = "POP", keep.by = FALSE, keep.w = FALSE, custom = list(fmean = v1, fmode = v2), give.names = FALSE)), oa(add_vars(fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), c(g$group.vars, "POP"))]) }) test_that("collapv gives informative errors", { expect_error(collapv(~cyl, ~cyl)) # nah, need to give error in qDF expect_error(collapv(wlddev, ~ country)) # same thing expect_error(collapv(wlddev, 14)) expect_error(collapv(wlddev, 1, w = 14)) expect_error(collapv(wlddev, 1, w = "bla")) expect_error(collapv(wlddev, 1, w = 13, wFUN = bsum)) expect_error(collapv(wlddev, c(1,0))) expect_error(collapv(wlddev, c(1,14))) expect_warning(collapv(wlddev, 1, bla = 1)) # passes to fmean.data.frame which give the error. expect_error(collapv(wlddev, 2, cols = 9:14)) expect_error(collapv(wlddev, 2, cols = c("PCGDP","bla"))) expect_error(collapv(wlddev, 2, cols = c("PCGDP","LIFEEX1"))) expect_error(collapv(wlddev, 2, custom = ~ PCGDP)) expect_error(collapv(wlddev, 2, custom = list(fmean, fmode))) expect_error(collapv(wlddev, 2, custom = list(fmean = 9:14, fmode = 4:6))) expect_error(collapv(wlddev, 2, custom = list(fmean = 9:14, 4:6))) expect_error(collapv(wlddev, 2, custom = list(fmean = 9:14, fmode2 = 4:6))) expect_error(collapv(wlddev, 2, custom = list(fmean = 9:13, fmode = c("GINI","bla")))) expect_error(collapv(wlddev, 2, custom = list(fmean = 9:13, fmode = c("GINI","PCGDP2")))) }) options(warn = 1) collapse/tests/testthat/test-qsu.R0000644000176200001440000002417214172367040017000 0ustar liggesuserscontext("qsu") # rm(list = ls()) bmean <- base::mean bsd <- stats::sd bsum <- base::sum bstats <- function(x) { if(!is.numeric(x)) return(c(N = bsum(!is.na(x)), Mean = NA_real_, SD = NA_real_, Min = NA_real_, Max = NA_real_)) c(N = bsum(!is.na(x)), Mean = bmean(x, na.rm = TRUE), SD = bsd(x, na.rm = TRUE), `names<-`(range(x, na.rm = TRUE), c("Min", "Max"))) } base_qsu <- function(x, g = NULL) { if(is.atomic(x) && !is.matrix(x)) return(`oldClass<-`(bstats(x), c("qsu", "table"))) if(is.null(g)) { r <- t(dapply(x, bstats, return = "matrix")) return(`oldClass<-`(r, c("qsu", "matrix", "table"))) } r <- simplify2array(BY(x, g, bstats, return = "list", expand.wide = TRUE)) return(`oldClass<-`(r, c("qsu", "array", "table"))) } wldNA <- na_insert(wlddev) xNA <- na_insert(rnorm(100)) ones <- rep(1, fnrow(wlddev)) for(i in 1:2) { if(i == 1L) qsu <- function(x, ...) collapse::qsu(x, ..., stable.algo = FALSE) if(i == 2L) qsu <- collapse::qsu test_that("qsu works properly for simple cases (including unit groups and weights)", { expect_equal(qsu(1:10), base_qsu(1:10)) expect_equal(qsu(10:1), base_qsu(10:1)) expect_equal(qsu(xNA), base_qsu(xNA)) expect_equal(qsu(wlddev), base_qsu(wlddev)) expect_equal(qsu(wldNA), base_qsu(wldNA)) expect_equal(qsu(GGDC10S), base_qsu(GGDC10S)) expect_equal(qsu(1:10, w = rep(1, 10)), base_qsu(1:10)) expect_equal(qsu(10:1, w = rep(1, 10)), base_qsu(10:1)) expect_equal(qsu(xNA, w = rep(1, 100)), base_qsu(xNA)) expect_equal(qsu(wlddev, w = ones), base_qsu(wlddev)) expect_equal(qsu(wldNA, w = ones), base_qsu(wldNA)) expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S))), base_qsu(GGDC10S)) expect_equal(unattrib(qsu(1:10, g = rep(1, 10))), unattrib(base_qsu(1:10))) expect_equal(unattrib(qsu(10:1, g = rep(1, 10))), unattrib(base_qsu(10:1))) expect_equal(unattrib(qsu(xNA, g = rep(1, 100))), unattrib(base_qsu(xNA))) expect_equal(unattrib(qsu(wlddev, by = ones)), unattrib(t(base_qsu(wlddev)))) # This should be an array... or oriented the other way around... expect_equal(unattrib(qsu(1:10, g = rep(1, 10), w = rep(1, 10))), unattrib(base_qsu(1:10))) expect_equal(unattrib(qsu(10:1, g = rep(1, 10), w = rep(1, 10))), unattrib(base_qsu(10:1))) expect_equal(unattrib(qsu(xNA, g = rep(1, 100), w = rep(1, 100))), unattrib(base_qsu(xNA))) expect_equal(qsu(wldNA, w = ones), base_qsu(wldNA)) expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S))), base_qsu(GGDC10S)) expect_equal(t(unclass(qsu(wldNA, w = ones, by = ones))), unclass(base_qsu(wldNA))) expect_equal(t(unclass(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), by = rep(1, fnrow(GGDC10S))))), unclass(base_qsu(GGDC10S))) }) } rm(qsu) test_that("qsu works properly for simple cases with higher-order statistics (including unit groups and weights)", { expect_equal(qsu(1:10, higher = TRUE)[1:5], base_qsu(1:10)) expect_equal(qsu(10:1, higher = TRUE)[1:5], base_qsu(10:1)) expect_equal(qsu(xNA, higher = TRUE)[1:5], base_qsu(xNA)) expect_equal(qsu(wlddev, higher = TRUE)[,1:5], base_qsu(wlddev)) expect_equal(qsu(wldNA, higher = TRUE)[,1:5], base_qsu(wldNA)) expect_equal(qsu(GGDC10S, higher = TRUE)[,1:5], base_qsu(GGDC10S)) expect_equal(qsu(1:10, w = rep(1, 10), higher = TRUE)[1:5], base_qsu(1:10)) expect_equal(qsu(10:1, w = rep(1, 10), higher = TRUE)[1:5], base_qsu(10:1)) expect_equal(qsu(xNA, w = rep(1, 100), higher = TRUE)[1:5], base_qsu(xNA)) expect_equal(qsu(wlddev, w = ones, higher = TRUE)[,1:5], base_qsu(wlddev)) expect_equal(qsu(wldNA, w = ones, higher = TRUE)[,1:5], base_qsu(wldNA)) expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), higher = TRUE)[,1:5], base_qsu(GGDC10S)) expect_equal(unattrib(qsu(1:10, g = rep(1, 10), higher = TRUE)[1:5]), unattrib(base_qsu(1:10))) expect_equal(unattrib(qsu(10:1, g = rep(1, 10), higher = TRUE)[1:5]), unattrib(base_qsu(10:1))) expect_equal(unattrib(qsu(xNA, g = rep(1, 100), higher = TRUE)[1:5]), unattrib(base_qsu(xNA))) expect_equal(unattrib(qsu(wlddev, by = ones, higher = TRUE)[1:5, ]), unattrib(t(base_qsu(wlddev)))) # This should be an array... or oriented the other way around... expect_equal(unattrib(qsu(1:10, g = rep(1, 10), w = rep(1, 10), higher = TRUE)[1:5]), unattrib(base_qsu(1:10))) expect_equal(unattrib(qsu(10:1, g = rep(1, 10), w = rep(1, 10), higher = TRUE)[1:5]), unattrib(base_qsu(10:1))) expect_equal(unattrib(qsu(xNA, g = rep(1, 100), w = rep(1, 100), higher = TRUE)[1:5]), unattrib(base_qsu(xNA))) expect_equal(qsu(wldNA, w = ones, higher = TRUE)[,1:5], base_qsu(wldNA)) expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), higher = TRUE)[,1:5], base_qsu(GGDC10S)) expect_equal(t(unclass(qsu(wldNA, w = ones, by = ones, higher = TRUE)[1:5,])), unclass(base_qsu(wldNA))) expect_equal(t(unclass(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), by = rep(1, fnrow(GGDC10S)), higher = TRUE)))[,1:5], unclass(base_qsu(GGDC10S))) }) g <- GRP(wlddev, ~ income) p <- GRP(wlddev, ~ iso3c) for(i in 1:2) { if(i == 1L) qsu <- function(x, ...) collapse::qsu(x, ..., stable.algo = FALSE) if(i == 2L) qsu <- collapse::qsu test_that("qsu works properly for grouped and panel data computations", { # Grouped Statistics expect_equal(qsu(wldNA, g), base_qsu(wldNA, g)) expect_equal(qsu(GGDC10S, GGDC10S$Variable), base_qsu(GGDC10S, GGDC10S$Variable)) # Grouped and Weighted Statistics expect_equal(qsu(wldNA, g, w = ones), base_qsu(wldNA, g)) expect_equal(qsu(GGDC10S, GGDC10S$Variable, w = rep(1, fnrow(GGDC10S))), base_qsu(GGDC10S, GGDC10S$Variable)) # Panel Data Statistics ps <- qsu(wldNA, pid = p, cols = is.numeric) expect_equal(unattrib(t(ps["Overall",,])), unattrib(base_qsu(nv(wldNA)))) expect_equal(unattrib(t(ps["Between",,])), unattrib(base_qsu(fmean(nv(wldNA), p)))) expect_equal(unattrib(t(ps["Within", -1,])), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"))[, -1])) # Weighted Panel Data Statistics ps <- qsu(wldNA, pid = p, w = ones, cols = is.numeric) expect_equal(unattrib(t(ps["Overall",,])), unattrib(base_qsu(nv(wldNA)))) expect_equal(unattrib(t(ps["Between",-1,])), unattrib(base_qsu(fbetween(nv(wldNA), p))[,-1])) expect_equal(unattrib(t(ps["Within", -1,])), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"))[, -1])) # Grouped Panel Data Statistics ps <- qsu(wldNA, by = g, pid = p, cols = is.numeric) expect_equal(unattrib(ps[,,"Overall",]), unattrib(base_qsu(nv(wldNA), g))) expect_equal(unattrib(ps[,-1,"Between",]), unattrib(base_qsu(fbetween(nv(wldNA), p), g)[,-1,])) expect_equal(unattrib(ps[,-1,"Within",]), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"), g)[,-1,])) # Grouped and Weighted Panel Data Statistics ps <- qsu(wldNA, by = g, pid = p, w = ones, cols = is.numeric) expect_equal(unattrib(ps[,,"Overall",]), unattrib(base_qsu(nv(wldNA), g))) expect_equal(unattrib(ps[,-1,"Between",]), unattrib(base_qsu(fbetween(nv(wldNA), p), g)[,-1,])) expect_equal(unattrib(ps[,-1,"Within",]), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"), g)[,-1,])) }) } rm(qsu) test_that("qsu works properly for grouped and panel data computations with higher-order statistics", { # Grouped Statistics expect_equal(qsu(wldNA, g, higher = TRUE)[,1:5,], base_qsu(wldNA, g)) expect_equal(qsu(GGDC10S, GGDC10S$Variable, higher = TRUE)[,1:5,], base_qsu(GGDC10S, GGDC10S$Variable)) # Grouped and Weighted Statistics expect_equal(qsu(wldNA, g, w = ones, higher = TRUE)[,1:5,], base_qsu(wldNA, g)) expect_equal(qsu(GGDC10S, GGDC10S$Variable, w = rep(1, fnrow(GGDC10S)), higher = TRUE)[,1:5,], base_qsu(GGDC10S, GGDC10S$Variable)) # Panel Data Statistics ps <- qsu(wldNA, pid = p, cols = is.numeric, higher = TRUE)[,1:5,] expect_equal(unattrib(t(ps["Overall",,])), unattrib(base_qsu(nv(wldNA)))) expect_equal(unattrib(t(ps["Between",,])), unattrib(base_qsu(fmean(nv(wldNA), p)))) expect_equal(unattrib(t(ps["Within", -1,])), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"))[, -1])) # Weighted Panel Data Statistics ps <- qsu(wldNA, pid = p, w = ones, cols = is.numeric, higher = TRUE)[,1:5,] expect_equal(unattrib(t(ps["Overall",,])), unattrib(base_qsu(nv(wldNA)))) # TODO: Figure out why this test fails !!!!!! # expect_equal(unattrib(t(ps["Between",-1,])), unattrib(base_qsu(fbetween(nv(wldNA), p))[,-1])) expect_equal(unattrib(t(ps["Within", -1,])), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"))[, -1])) # Grouped Panel Data Statistics ps <- qsu(wldNA, by = g, pid = p, cols = is.numeric, higher = TRUE)[,1:5,,] expect_equal(unattrib(ps[,,"Overall",]), unattrib(base_qsu(nv(wldNA), g))) expect_equal(unattrib(ps[,-1,"Between",]), unattrib(base_qsu(fbetween(nv(wldNA), p), g)[,-1,])) expect_equal(unattrib(ps[,-1,"Within",]), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"), g)[,-1,])) # Grouped and Weighted Panel Data Statistics ps <- qsu(wldNA, by = g, pid = p, w = ones, cols = is.numeric, higher = TRUE)[,1:5,,] expect_equal(unattrib(ps[,,"Overall",]), unattrib(base_qsu(nv(wldNA), g))) expect_equal(unattrib(ps[,-1,"Between",]), unattrib(base_qsu(fbetween(nv(wldNA), p), g)[,-1,])) expect_equal(unattrib(ps[,-1,"Within",]), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"), g)[,-1,])) }) # Make more tests!! See also collapse general TODO ! test_that("qsu gives errors for wrong input", { expect_error(qsu(wlddev$year, 2:4)) expect_error(qsu(wlddev$year, pid = 2:4)) expect_error(qsu(wlddev, 2:4)) expect_error(qsu(wlddev, pid = 2:4)) expect_error(qsu(wlddev$year, letters)) expect_error(qsu(wlddev$year, pid = letters)) expect_error(qsu(wlddev, letters)) expect_error(qsu(wlddev, pid = letters)) expect_error(qsu(wlddev, ~ iso3c + bla)) expect_error(qsu(wlddev, pid = ~ iso3c + bla)) expect_visible(qsu(wlddev, PCGDP ~ region + income)) expect_visible(qsu(wlddev, pid = PCGDP ~ region + income)) expect_equal(qsu(wlddev, PCGDP ~ region + income, ~ iso3c), qsu(wlddev, ~ region + income, pid = PCGDP ~ iso3c)) expect_error(qsu(wlddev, cols = 9:14)) expect_error(qsu(wlddev, cols = c("PCGDP","bla"))) }) collapse/tests/testthat/test-dapply.R0000644000176200001440000000704513735052317017463 0ustar liggesuserscontext("dapply") # rm(list = ls()) test_that("All common uses of dapply can be performed, as per examples", { # data.frame expect_equal(dapply(mtcars, force), mtcars) expect_equal(dapply(`attr<-`(mtcars, "bla", 1), force), `attr<-`(mtcars, "bla", 1)) expect_equal(dapply(`attr<-`(mtcars, "bla", 1), force, MARGIN = 1), `attr<-`(mtcars, "bla", 1)) expect_visible(dapply(mtcars, log)) expect_true(is.matrix(dapply(mtcars, log, return = "matrix"))) # matrix m <- as.matrix(mtcars) expect_equal(dapply(m, force), m) expect_equal(dapply(EuStockMarkets, force), EuStockMarkets) expect_equal(dapply(EuStockMarkets, force, MARGIN = 1), EuStockMarkets) expect_visible(dapply(m, log)) expect_true(is.data.frame(dapply(m, log, return = "data.frame"))) # matrix <> data.frame conversions expect_equal(dapply(mtcars, log, return = "matrix"), dapply(m, log)) expect_equal(dapply(mtcars, log, return = "matrix", MARGIN = 1), dapply(m, log, MARGIN = 1)) expect_equal(dapply(m, log, return = "data.frame"), dapply(mtcars, log)) expect_equal(dapply(m, log, return = "data.frame", MARGIN = 1), dapply(mtcars, log, MARGIN = 1)) expect_equal(dapply(mtcars, quantile, return = "matrix"), dapply(m, quantile)) expect_equal(dapply(mtcars, quantile, return = "matrix", MARGIN = 1), dapply(m, quantile, MARGIN = 1)) expect_equal(dapply(m, quantile, return = "data.frame"), dapply(mtcars, quantile)) expect_equal(dapply(m, quantile, return = "data.frame", MARGIN = 1), dapply(mtcars, quantile, MARGIN = 1)) # scalar function gives atomic vector expect_true(is.atomic(dapply(mtcars, sum))) expect_equal(dapply(m, sum), dapply(mtcars, sum)) expect_true(is.atomic(dapply(mtcars, sum, MARGIN = 1))) expect_equal(dapply(m, sum, MARGIN = 1), dapply(mtcars, sum, MARGIN = 1)) # drop = FALSE retains object structure expect_true(is.data.frame(dapply(mtcars, sum, drop = FALSE))) expect_true(is.data.frame(dapply(mtcars, sum, MARGIN = 1, drop = FALSE))) expect_true(is.matrix(dapply(m, sum, drop = FALSE))) expect_true(is.matrix(dapply(m, sum, MARGIN = 1, drop = FALSE))) # matrix <> data.frame conversions without drop dimensions expect_equal(dapply(m, sum, drop = FALSE), dapply(mtcars, sum, return = "matrix", drop = FALSE)) expect_equal(dapply(mtcars, sum, drop = FALSE), dapply(m, sum, return = "data.frame", drop = FALSE)) # ... but if function is vector value, drop = FALSE does nothing expect_true(is.data.frame(dapply(mtcars, log, drop = FALSE))) expect_true(is.data.frame(dapply(mtcars, log, MARGIN = 1, drop = FALSE))) expect_true(is.data.frame(dapply(mtcars, quantile, drop = FALSE))) expect_true(is.data.frame(dapply(mtcars, quantile, MARGIN = 1, drop = FALSE))) expect_true(is.matrix(dapply(m, log, drop = FALSE))) expect_true(is.matrix(dapply(m, log, MARGIN = 1, drop = FALSE))) expect_true(is.matrix(dapply(m, quantile, drop = FALSE))) expect_true(is.matrix(dapply(m, quantile, MARGIN = 1, drop = FALSE))) # passing additional arguments works: dapply(mtcars, weighted.mean, mtcars$hp, na.rm = TRUE) dapply(m, weighted.mean, mtcars$hp, na.rm = TRUE) }) test_that("dapply produces errors for wrong input", { expect_error(dapply("a", sum)) expect_error(dapply(~ y, sum)) expect_error(dapply(iris3, sum)) expect_error(dapply(mtcars, sum2)) expect_error(dapply(mtcars, sum, MARGIN = 3)) expect_error(dapply(mtcars, sum, MARGIN = 1:2)) expect_error(dapply(mtcars, sum, MARGIN = "a")) expect_error(dapply(mtcars, sum, return = "bla", drop = FALSE)) }) collapse/tests/testthat/test-splitting.R0000644000176200001440000000445714163402325020205 0ustar liggesuserscontext("gsplit and rsplit") wld2 <- wlddev oldClass(wld2) <- NULL vlabels(wld2) <- NULL f <- wld2$iso3c ind <- 1:1000 fss <- f[ind] fl <- wld2[c("region", "income")] flss <- ss(fl, ind) test_that("gsplit / rsplit work like split", { for(i in seq_col(wld2)) { expect_equal(gsplit(wld2[[i]], f, TRUE), split(wld2[[i]], f)) expect_equal(gsplit(wld2[[i]], f, FALSE), `names<-`(split(wld2[[i]], f), NULL)) expect_equal(gsplit(wld2[[i]][ind], fss, TRUE), split(wld2[[i]][ind], fss)) expect_equal(rsplit(wld2[[i]][ind], fss), split(wld2[[i]][ind], fss, drop = TRUE)) # factor list expect_true(all_obj_equal(gsplit(wld2[[i]], fl, TRUE), rsplit(wld2[[i]], fl, flatten = TRUE), unlist(rsplit(wld2[[i]], fl), recursive = FALSE), split(wld2[[i]], fl, drop = TRUE, lex.order = TRUE))) expect_true(all_obj_equal(gsplit(wld2[[i]][ind], flss, TRUE), rsplit(wld2[[i]][ind], flss, flatten = TRUE), unlist(rsplit(wld2[[i]][ind], flss), recursive = FALSE), split(wld2[[i]][ind], flss, drop = TRUE, lex.order = TRUE))) } }) test_that("rsplit data frame method works as intended", { expect_equal(rsplit(mtcars, mtcars$cyl), split(mtcars, mtcars$cyl)) expect_equal(rsplit(mtcars, mpg ~ cyl), split(mtcars$mpg, mtcars$cyl)) expect_equal(rsplit(mtcars, mpg ~ cyl, simplify = FALSE), split(mtcars["mpg"], mtcars$cyl)) expect_true(all_obj_equal(rsplit(mtcars, mtcars[.c(cyl, vs, am)], flatten = TRUE), rsplit(mtcars, ~ cyl + vs + am, flatten = TRUE, keep.by = TRUE), unlist(unlist(rsplit(mtcars, mtcars[.c(cyl, vs, am)]), FALSE), FALSE), unlist(unlist(rsplit(mtcars, ~ cyl + vs + am, keep.by = TRUE), FALSE), FALSE), split(mtcars, mtcars[.c(cyl, vs, am)], drop = TRUE, lex.order = TRUE))) expect_true(all_obj_equal(rsplit(mtcars, ~ cyl + vs + am, flatten = TRUE), unlist(unlist(rsplit(mtcars, ~ cyl + vs + am), FALSE), FALSE), split(mtcars[names(mtcars) %!in% .c(cyl, vs, am)], mtcars[.c(cyl, vs, am)], drop = TRUE, lex.order = TRUE))) }) collapse/tests/testthat/test-select-replace-vars.R0000644000176200001440000001767614066131655022047 0ustar liggesuserscontext("select, replace or add vars") # rm(list = ls()) test_that("selecting vars works well", { expect_identical(get_vars(wlddev, 4:8), wlddev[4:8]) expect_identical(get_vars(wlddev, -(4:8)), wlddev[-(4:8)]) expect_identical(get_vars(wlddev, sapply(wlddev, is.numeric)), wlddev[sapply(wlddev, is.numeric)]) expect_identical(get_vars(wlddev, c("iso3c","PCGDP","ODA")), wlddev[c("iso3c","PCGDP","ODA")]) expect_identical(get_vars(wlddev, "D", regex = TRUE), wlddev[c("OECD","PCGDP","ODA")]) expect_identical(get_vars(wlddev, c("D","L"), regex = TRUE), wlddev[c("OECD","PCGDP","LIFEEX","ODA")]) expect_identical(get_vars(wlddev, is.factor), wlddev[sapply(wlddev, is.factor)]) expect_identical(num_vars(wlddev), wlddev[sapply(wlddev, is.numeric)]) expect_identical(cat_vars(wlddev), wlddev[sapply(wlddev, is_categorical)]) expect_identical(char_vars(wlddev), wlddev[sapply(wlddev, is.character)]) expect_identical(fact_vars(wlddev), wlddev[sapply(wlddev, is.factor)]) expect_identical(date_vars(wlddev), wlddev[sapply(wlddev, is_date)]) }) test_that("replacing vars works well", { wlddevold <- wlddev get_vars(wlddev, 4:8) <- get_vars(wlddev, 4:8) expect_identical(wlddevold, wlddev) wlddevold <- wlddev fselect(wlddev, PCGDP:GINI) <- fselect(wlddev, PCGDP:GINI) expect_identical(wlddevold, wlddev) wlddevold <- wlddev get_vars(wlddev, -(4:8)) <- get_vars(wlddev, -(4:8)) expect_identical(wlddevold, wlddev) wlddevold <- wlddev fselect(wlddev, -(PCGDP:GINI)) <- fselect(wlddev, -(PCGDP:GINI)) expect_identical(wlddevold, wlddev) wlddevold <- wlddev get_vars(wlddev, -(4:8)) <- as.list(get_vars(wlddev, -(4:8))) expect_identical(wlddevold, wlddev) wlddevold <- wlddev get_vars(wlddev, c("iso3c","PCGDP","ODA")) <- get_vars(wlddev, c("iso3c","PCGDP","ODA")) expect_identical(wlddevold, wlddev) wlddevold <- wlddev get_vars(wlddev, "D", regex = TRUE) <- get_vars(wlddev, "D", regex = TRUE) expect_identical(wlddevold, wlddev) wlddevold <- wlddev get_vars(wlddev, c("D","L"), regex = TRUE) <- get_vars(wlddev, c("D","L"), regex = TRUE) expect_identical(wlddevold, wlddev) wlddevold <- wlddev get_vars(wlddev, sapply(wlddev, is.numeric)) <- get_vars(wlddev, sapply(wlddev, is.numeric)) expect_identical(wlddevold, wlddev) wlddevold <- wlddev get_vars(wlddev, is.factor) <- get_vars(wlddev, is.factor) expect_identical(wlddevold, wlddev) wlddevold <- wlddev num_vars(wlddev) <- num_vars(wlddev) expect_identical(wlddevold, wlddev) wlddevold <- wlddev cat_vars(wlddev) <- cat_vars(wlddev) expect_identical(wlddevold, wlddev) wlddevold <- wlddev char_vars(wlddev) <- char_vars(wlddev) expect_identical(wlddevold, wlddev) wlddevold <- wlddev fact_vars(wlddev) <- fact_vars(wlddev) expect_identical(wlddevold, wlddev) wlddevold <- wlddev logi_vars(wlddev) <- logi_vars(wlddev) expect_identical(wlddevold, wlddev) wlddevold <- wlddev date_vars(wlddev) <- date_vars(wlddev) expect_identical(wlddevold, wlddev) }) test_that("adding vars works well", { wlddev1 <- wlddev2 <- wlddev temp <- STD(get_vars(wlddev, 9:12)) add_vars(wlddev1) <- temp wlddev2[names(temp)] <- temp expect_identical(wlddev1, wlddev2) wlddev1 <- wlddev temp <- STD(get_vars(wlddev, 9:12)) add_vars(wlddev1, "front") <- temp expect_identical(wlddev1, add_vars(temp, wlddev)) wlddev1 <- wlddev temp <- STD(get_vars(wlddev, 9:13)) add_vars(wlddev1, c(10,12,14,16,18)) <- temp expect_true(all_identical(wlddev1, add_vars(wlddev, temp, pos = c(10,12,14,16,18)), add_vars(gv(wlddev, 1:9), gv(temp, 1), gv(wlddev, 10), gv(temp, 2), gv(wlddev, 11), gv(temp, 3), gv(wlddev, 12), gv(temp, 4), gv(wlddev, 13), gv(temp, 5)))) }) test_that("replacing with or adding atomic elements works well", { wlddev1 <- wlddev2 <- wlddev get_vars(wlddev1, 9) <- wlddev$PCGDP expect_identical(wlddev1, wlddev) get_vars(wlddev1, 9) <- qM(wlddev[9:12]) wlddev2[9] <- qM(wlddev[9:12]) expect_identical(wlddev1, wlddev2) wlddev1 <- wlddev2 <- wlddev add_vars(wlddev1) <- wlddev$PCGDP expect_identical(wlddev1, cbind(wlddev2, wlddev["PCGDP"])) wlddev1 <- wlddev2 <- wlddev add_vars(wlddev1) <- qM(wlddev[9:12]) wlddev2["wlddev[9:12]"] <- qM(wlddev[9:12]) # formerly wlddev2["qM(wlddev[9:12])"], but no longer using deparse.. expect_identical(wlddev1, wlddev2) wlddev1 <- wlddev2 <- wlddev add_vars(wlddev1, "front") <- wlddev$PCGDP expect_identical(wlddev1, add_vars(wlddev, wlddev$PCGDP, pos = 1)) wlddev1 <- wlddev2 <- wlddev add_vars(wlddev1, "front") <- qM(wlddev[9:12]) expect_identical(wlddev1, add_vars(wlddev, qM(wlddev[9:12]), pos = 1)) }) test_that("empty selections work well", { expect_equal(cat_vars(mtcars), mtcars[0L]) expect_equal(char_vars(mtcars), mtcars[0L]) expect_equal(fact_vars(mtcars), mtcars[0L]) expect_equal(logi_vars(mtcars), mtcars[0L]) expect_equal(get_vars(mtcars, is.character), mtcars[0L]) expect_equal(get_vars(mtcars, 0L), mtcars[0L]) expect_error(get_vars(mtcars, NULL)) }) test_that("select vars errors for wrong input", { expect_error(get_vars(wlddev, 14)) expect_error(get_vars(wlddev, 1:14)) expect_error(get_vars(wlddev, -14)) expect_error(get_vars(wlddev, c("PCGDP","ODA3"))) # expect_warning(get_vars(wlddev, "bla", regex = TRUE)) # Better give error expect_error(get_vars(wlddev, c(sapply(wlddev, is.numeric), TRUE))) expect_error(get_vars(wlddev, sapply(wlddev, is.numeric)[-1])) }) test_that("replace vars errors for wrong input", { expect_error(get_vars(wlddev, 14) <- wlddev[12]) expect_error(get_vars(wlddev, "ODA3") <- wlddev[12]) expect_error(get_vars(wlddev, "bla", regex = TRUE) <- wlddev[12]) expect_error(get_vars(wlddev, -14) <- wlddev[12]) expect_error(get_vars(wlddev, 11:12) <- wlddev[12]) expect_error(get_vars(wlddev, 9:12) <- wlddev[8:12]) expect_invisible(get_vars(wlddev, 12) <- wlddev$ODA) expect_error(get_vars(wlddev, 12) <- wlddev$ODA[-1]) expect_error(get_vars(wlddev, 12) <- qM(wlddev[9:12])[-1, ]) expect_error(get_vars(wlddev, c(sapply(wlddev, is.numeric), TRUE)) <- wlddev) expect_error(get_vars(wlddev, sapply(wlddev, is.numeric)[-1]) <- wlddev) }) test_that("add_vars errors for wrong input", { expect_error(add_vars(wlddev, 15) <- wlddev[12]) expect_error(add_vars(wlddev, "ODA3") <- wlddev[12]) expect_error(add_vars(wlddev) <- qM(wlddev[9:12])[-1, ]) expect_error(add_vars(wlddev, "front") <- qM(wlddev[9:12])[-1, ]) expect_error(add_vars(wlddev, 8) <- qM(wlddev[9:12])[-1, ]) expect_error(add_vars(wlddev) <- wlddev[-1, 9:12]) expect_error(add_vars(wlddev, "front") <- wlddev[-1, 9:12]) expect_error(add_vars(wlddev, 8) <- wlddev[-1, 9:12]) expect_error(add_vars(wlddev, 12) <- wlddev[9:12]) expect_error(add_vars(wlddev, 9:12) <- wlddev[9:10]) }) test_that("fselect errors for wrong input", { expect_visible(fselect(mtcars, 1)) expect_error(fselect(mtcars, "bla")) expect_visible(fselect(mtcars, "mpg")) expect_error(fselect(mtcars, mpg:bla)) expect_error(fselect(mtcars, mpg > cyl)) expect_error(fselect(mtcars, ~mpg)) }) test_that("fselect works properly", { expect_equal(fselect(mtcars, mpg, 2), mtcars[1:2]) expect_equal(fselect(mtcars, mpg:vs), mtcars[1:8]) expect_equal(names(fselect(mtcars, bla = mpg, cyl:vs)), c("bla", names(mtcars)[2:8])) expect_invisible(fselect(wlddev, -PCGDP) <- fselect(wlddev, -PCGDP)) }) test_that("no problems with numeric values", { expect_equal(fselect(mtcars, 1), mtcars[1]) expect_equal(get_vars(mtcars, 1), mtcars[1]) expect_equal(gv(mtcars, 1), mtcars[1]) expect_invisible(fselect(mtcars, 1) <- mtcars[1]) expect_invisible(get_vars(mtcars, 1) <- mtcars[1]) expect_invisible(gv(mtcars, 1) <- mtcars[1]) expect_invisible(av(mtcars, pos = 1) <- mtcars[1]) }) collapse/tests/testthat/test-flm-fFtest.R0000644000176200001440000001041514170063326020170 0ustar liggesuserscontext("flm and fFtest") y <- mtcars$mpg x <- qM(mtcars[c("cyl","vs","am","carb","hp")]) w <- mtcars$wt lmr <- lm(mpg ~ cyl + vs + am + carb + hp, mtcars) lmw <- lm(mpg ~ cyl + vs + am + carb + hp, weights = wt, mtcars) NCRAN <- identical(Sys.getenv("NCRAN"), "TRUE") test_that("flm works as intended", { if(NCRAN) for(i in 1:6) expect_equal(drop(flm(y, x, add.icpt = TRUE, method = i)), coef(lmr)) if(NCRAN) for(i in 1:6) expect_equal(drop(flm(y, x, w, add.icpt = TRUE, method = i)), coef(lmw)) expect_equal(flm(y, x, method = 1L, return.raw = TRUE), .lm.fit(x, y)) expect_equal(flm(y, x, method = 2L, return.raw = TRUE), solve(crossprod(x), crossprod(x, y))) expect_equal(flm(y, x, method = 3L, return.raw = TRUE), qr.coef(qr(x), y)) expect_equal(flm(y, x, method = 5L, return.raw = TRUE), cinv(crossprod(x)) %*% crossprod(x, y)) if(NCRAN) { # This is to fool very silly checks on CRAN scanning the code of the tests afmlp <- eval(parse(text = paste0("RcppArmadillo", ":", ":", "fastLmPure"))) efmlp <- eval(parse(text = paste0("RcppEigen", ":", ":", "fastLmPure"))) expect_equal(flm(y, x, method = 4L, return.raw = TRUE), afmlp(x, y)) expect_equal(flm(y, x, method = 6L, return.raw = TRUE), efmlp(x, y, 3L)) } if(NCRAN) for(i in 1:6) expect_visible(flm(y, x, w, method = i, return.raw = TRUE)) ym <- cbind(y, y) for(i in c(1:3, 5L)) expect_visible(flm(ym, x, w, method = i)) expect_error(flm(y[-1L], x, w)) expect_error(flm(y, x, w[-1L])) expect_error(flm(y, x[-1L, ], w)) }) test_that("fFtest works as intended", { r <- fFtest(iris$Sepal.Length, gv(iris, -1L)) rlm <- summary(lm(Sepal.Length ~., iris)) expect_equal(unattrib(r)[1:4], unattrib(c(rlm$r.squared, rlm$fstatistic[c(2:3, 1L)]))) # Same with weights: w <- abs(rnorm(fnrow(iris))) r <- fFtest(iris$Sepal.Length, gv(iris, -1L), w = w) rlm <- summary(lm(Sepal.Length ~., weights = w, iris)) expect_equal(unattrib(r)[1:4], unattrib(c(rlm$r.squared, rlm$fstatistic[c(2:3, 1L)]))) # Repeat with missing values set.seed(101) iris <- na_insert(iris) r <- fFtest(iris$Sepal.Length, gv(iris, -1L)) rlm <- summary(lm(Sepal.Length ~., iris)) expect_equal(unattrib(r)[1:4], unattrib(c(rlm$r.squared, rlm$fstatistic[c(2:3, 1L)]))) # Same with weights: set.seed(101) w <- na_insert(w) r <- fFtest(iris$Sepal.Length, gv(iris, -1L), w = w) rlm <- summary(lm(Sepal.Length ~., weights = w, iris)) expect_equal(unattrib(r)[1:4], unattrib(c(rlm$r.squared, rlm$fstatistic[c(2:3, 1L)]))) rm(iris) if(NCRAN) { r <- fFtest(wlddev$PCGDP, qF(wlddev$year), wlddev[c("iso3c","LIFEEX")]) # Same test done using lm: data <- na_omit(get_vars(wlddev, c("iso3c","year","PCGDP","LIFEEX")), na.attr = TRUE) full <- lm(PCGDP ~ LIFEEX + iso3c + qF(year), data) rest <- lm(PCGDP ~ LIFEEX + iso3c, data) ranv <- anova(rest, full) expect_equal(unattrib(r[1L, 1:4]), unlist(summary(full)[c("r.squared", "fstatistic")], use.names = FALSE)[c(1L, 3:4, 2L)]) expect_equal(unattrib(r[2L, 1:4]), unlist(summary(rest)[c("r.squared", "fstatistic")], use.names = FALSE)[c(1L, 3:4, 2L)]) expect_equal(rev(unattrib(r[1:2, 3L])), ranv$Res.Df) expect_equal(r[3L, 2L], na_rm(ranv$Df)) expect_equal(r[3L, 4L], na_rm(ranv$F)) expect_equal(r[3L, 5L], na_rm(ranv$`Pr(>F)`)) # Same with weights: w <- abs(rnorm(fnrow(wlddev))) r <- fFtest(wlddev$PCGDP, qF(wlddev$year), wlddev[c("iso3c","LIFEEX")], w) full <- lm(PCGDP ~ LIFEEX + iso3c + qF(year), weights = w[-attr(data, "na.action")], data) rest <- lm(PCGDP ~ LIFEEX + iso3c, weights = w[-attr(data, "na.action")], data) ranv <- anova(rest, full) expect_equal(unattrib(r[1L, 1:4]), unlist(summary(full)[c("r.squared", "fstatistic")], use.names = FALSE)[c(1L, 3:4, 2L)]) expect_equal(unattrib(r[2L, 1:4]), unlist(summary(rest)[c("r.squared", "fstatistic")], use.names = FALSE)[c(1L, 3:4, 2L)]) expect_equal(rev(unattrib(r[1:2, 3L])), ranv$Res.Df) expect_equal(r[3L, 2L], na_rm(ranv$Df)) expect_equal(r[3L, 4L], na_rm(ranv$F)) expect_equal(r[3L, 5L], na_rm(ranv$`Pr(>F)`)) } }) collapse/tests/testthat/test-data.table.R0000644000176200001440000002775114170063266020176 0ustar liggesuserscontext("collapse and data.table integration") bmean <- base::mean # TODO: Check memory allocation, particularly where names<- and attr<- are used. # Also check attribute handling helpers with atomic and S4 objects !! options(warn = -1L) library(data.table) library(magrittr) mtcDT <- qDT(roworderv(mtcars)) irisDT <- qDT(ss(iris, 1:100)) n <- 5L # copy <- identity # assignInNamespace("cedta.override", c(data.table:::cedta.override, "collapse"), "data.table") assignInNamespace("cedta.override", "collapse", "data.table") options(warn = 1L) test_that("creating columns and printing works after passing a data.table through collapse functions", { expect_true(is.data.table(mtcDT)) expect_true(is.data.table(irisDT)) expect_output(print(mtcDT)) expect_identical(names(mtcDT), names(mtcars)) expect_silent(mtcDT[, col := 1]) expect_output(print(mtcDT)) expect_silent(mtcDT[, col := NULL]) expect_identical(names(mtcDT), names(mtcars)) expect_output(print(mtcDT)) expect_silent(irisDT[, col := 1]) expect_silent(irisDT[, col := NULL]) # Statistical functions give warning dt <- fscale(copy(mtcDT)) expect_warning(dt[, new := 1]) expect_output(print(dt)) dt <- fsum(copy(mtcDT), TRA = 1) expect_warning(dt[, new := 1]) expect_output(print(dt)) dt <- fsum(copy(mtcDT), drop = FALSE) expect_warning(dt[, new := 1]) expect_output(print(dt)) for(i in 1:n) { if(!identical(copy, identity)) mtcDT <- qDT(mtcDT) expect_silent(mtcDT[, col := 1]) expect_silent(mtcDT[, col := NULL]) expect_identical(names(mtcDT), names(mtcars)) expect_identical(length(mtcDT), length(mtcars)) } # Other functions should work: for(i in 1:n) { dt <- fgroup_by(mtcDT, cyl) expect_identical(names(dt), names(mtcars)) # print(ltl(dt)) expect_silent(dt[, new := 1]) expect_output(print(dt)) # print(ltl(dt)) } for(i in 1:n) { dt2 <- fgroup_vars(dt) expect_silent(dt2[, new := 1]) expect_output(print(dt2)) } for(i in 1:n) { dt <- fungroup(fgroup_by(mtcDT, c(2,8:9))) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- funique(copy(mtcDT)) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- funique(copy(mtcDT), cols = "cyl") expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- fselect(copy(mtcDT), -mpg, -hp) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- fselect(copy(mtcDT), col2 = disp, wt:carb) expect_silent(dt[, new := 1]) expect_output(print(dt)) fselect(dt, col2, new) <- NULL expect_silent(dt[, ncol := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- fsubset(copy(mtcDT), cyl == 4) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- fsubset(copy(mtcDT), cyl == 4, bla = mpg, vs:am) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(mtcDT) %>% smr(mean_mpg = fmean(mpg)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(mtcDT) %>% smr(mean_mpg = bmean(mpg)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(mtcDT) %>% gby(cyl) %>% smr(mean_mpg = fmean(mpg)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(mtcDT) %>% gby(cyl) %>% smr(mean_mpg = bmean(mpg)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- ftransform(copy(mtcDT), bla = 1) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { settransform(dt, bla2 = 1) expect_silent(dt[, new2 := 1]) expect_output(print(dt)) } for(i in 1:n) { ftransform(dt) <- list(sds = mtcDT$qsec) expect_silent(dt[, new3 := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- fcompute(copy(mtcDT), bla = mpg + cyl, df = 1, keep = 7:10) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- roworderv(copy(mtcDT)) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- roworder(copy(mtcDT), cyl, -vs) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- roworderv(copy(mtcDT), cols = 1:2) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- colorderv(copy(mtcDT)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- colorder(copy(mtcDT), vs, cyl, am) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- frename(copy(mtcDT), carb = bla, mpg = x) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- frename(copy(mtcDT), toupper) expect_silent(dt[, new := 1]) expect_output(print(dt)) setrename(dt, MPG = ABC, new = NEW) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- get_vars(copy(irisDT), 1:3) expect_silent(dt[, new := 1]) expect_output(print(dt)) get_vars(dt, 1) <- irisDT$Species expect_silent(dt[, new2 := 1]) expect_output(print(dt)) } for(i in 1:n) { get_vars(dt, 1) <- NULL expect_silent(dt[, new3 := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- get_vars(irisDT, 1:3) %>% add_vars(gv(irisDT, 4)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { add_vars(dt) <- list(Sp = irisDT$Species) expect_silent(dt[, new2 := 1]) expect_output(print(dt)) } wldDT <- qDT(wlddev) for(i in .c(num_vars, nv, cat_vars, char_vars, fact_vars, logi_vars, date_vars)) { # print(i) # Iris data FUN <- match.fun(i) dt <- FUN(irisDT) expect_identical(names(dt), FUN(iris, "names")) expect_silent(dt[, new := 1]) expect_output(print(dt)) rm(dt) dt <- irisDT eval(substitute(FUN(dt) <- NULL, list(FUN = as.name(i)))) expect_silent(dt[, new := 1]) expect_output(print(dt)) rm(dt) # wlddev data dt <- FUN(wldDT) expect_identical(names(dt), FUN(wlddev, "names")) expect_silent(dt[, new := 1]) expect_output(print(dt)) rm(dt) dt <- wldDT eval(substitute(FUN(dt) <- NULL, list(FUN = as.name(i)))) expect_silent(dt[, new := 1]) expect_output(print(dt)) rm(dt) } for(i in 1:n) { dt <- relabel(copy(wldDT), toupper) expect_silent(dt[, new := 1]) expect_output(print(dt)) setrelabel(dt, PCGDP = "GRP per cap", LIFEEX = "LE") expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- qDT(qTBL(qDF(qDT(GGDC10S)))) expect_identical(names(dt), names(GGDC10S)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- fdroplevels(copy(wldDT)) expect_identical(names(dt), names(wlddev)) expect_true(!anyNA(vlabels(dt))) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { m <- qM(mtcars) dt <- qDT(m) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } expect_output(print(mtcDT[, qDT(pwcor(.SD)), by = cyl, .SDcols = c("mpg", "hp", "carb")])) expect_output(print(melt(qDT(GGDC10S)[, qDT(pwcor(.SD)), by = .(Variable, Country), .SDcols = 6:15], 1:2))) for(i in 1:n) { dt <- as_character_factor(wldDT) expect_identical(names(dt), names(wlddev)) expect_true(!anyNA(vlabels(dt))) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- as_character_factor(wldDT, keep.attr = FALSE) expect_identical(names(dt), names(wlddev)) expect_true(anyNA(vlabels(dt))) expect_silent(dt[, new := 1]) expect_output(print(dt)) } options(warn = -1L) for(i in 1:n) { dt <- as_numeric_factor(wldDT) expect_identical(names(dt), names(wlddev)) expect_true(!anyNA(vlabels(dt))) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- as_numeric_factor(wldDT, keep.attr = FALSE) expect_identical(names(dt), names(wlddev)) expect_true(anyNA(vlabels(dt))) expect_silent(dt[, new := 1]) expect_output(print(dt)) } options(warn = 1L) for(i in 1:n) { dt <- collap(wldDT, ~ iso3c) expect_identical(names(dt), names(wlddev)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- collapv(wldDT, 1) expect_identical(names(dt), names(wlddev)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- collapg(gby(wldDT, 1)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- dapply(copy(mtcDT), log) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- dapply(copy(mtcDT), log, return = "data.frame") expect_identical(names(dt), names(mtcars)) expect_error(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { l <- rsplit(copy(mtcDT), ~cyl) expect_silent(for(i in seq_along(l)) l[[i]][, new := 1]) expect_output(print(l)) expect_output(print(l[[1]])) } for(i in 1:n) { dt <- unlist2d(l, DT = TRUE) expect_silent(dt[, new45 := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- na_omit(copy(mtcDT), cols = 1:2) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- na_omit(copy(mtcDT)) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- na_insert(copy(mtcDT)) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(wldDT) vlabels(wldDT) <- NULL expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(mtcDT) %>% add_stub("B") expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(mtcDT) %>% add_stub("B") %>% rm_stub("B") expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(mtcDT) %>% setRownames expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(mtcDT) %>% frename(toupper) %>% setColnames(names(mtcars)) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- replace_NA(copy(wldDT), cols = is.numeric) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- replace_NA(copy(mtcDT), set = TRUE, cols = is.numeric) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- replace_Inf(copy(wldDT)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- replace_outliers(copy(wldDT), 3) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- recode_num(copy(wldDT), `1` = 2) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- recode_char(copy(wldDT), Uganda = "UGA") expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- pad(copy(mtcDT), 1:3) expect_silent(dt[, new := 1]) expect_output(print(dt)) } }) collapse/tests/testthat/test-fmin-fmax.R0000644000176200001440000004571014167365243020062 0ustar liggesuserscontext("fmin and fmax") bmin <- base::min bmax <- base::max # rm(list = ls()) set.seed(101) x <- rnorm(100) * 10000 xNA <- x xNA[sample.int(100,20)] <- NA f <- as.factor(sample.int(10, 100, TRUE)) g <- GRP(mtcars, ~ cyl + vs + am) mtcNA <- na_insert(mtcars) mtcNA[27,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" inf2NA <- function(x) { if(is.atomic(x)) { x[is.infinite(x)] <- NA } else { x[do.call(cbind, lapply(x, is.infinite))] <- NA } x } options(warn = -1) # fmin double test_that("fmin performs like base::min", { expect_equal(fmin(NA), bmin(NA)) expect_equal(fmin(NA, na.rm = FALSE), bmin(NA)) expect_equal(fmin(1), bmin(1, na.rm = TRUE)) expect_equal(fmin(1:3), bmin(1:3, na.rm = TRUE)) expect_equal(fmin(-1:1), bmin(-1:1, na.rm = TRUE)) expect_equal(fmin(1, na.rm = FALSE), bmin(1)) expect_equal(fmin(1:3, na.rm = FALSE), bmin(1:3)) expect_equal(fmin(-1:1, na.rm = FALSE), bmin(-1:1)) expect_equal(fmin(x), bmin(x, na.rm = TRUE)) expect_equal(fmin(x, na.rm = FALSE), bmin(x)) expect_equal(fmin(xNA, na.rm = FALSE), bmin(xNA)) expect_equal(fmin(xNA), bmin(xNA, na.rm = TRUE)) expect_equal(fmin(mtcars), fmin(m)) expect_equal(fmin(m), dapply(m, bmin, na.rm = TRUE)) expect_equal(fmin(m, na.rm = FALSE), dapply(m, bmin)) expect_equal(fmin(mNA, na.rm = FALSE), dapply(mNA, bmin)) expect_equal(fmin(mNA), dapply(mNA, bmin, na.rm = TRUE)) expect_equal(fmin(mtcars), dapply(mtcars, bmin, na.rm = TRUE)) expect_equal(fmin(mtcars, na.rm = FALSE), dapply(mtcars, bmin)) expect_equal(fmin(mtcNA, na.rm = FALSE), dapply(mtcNA, bmin)) expect_equal(fmin(mtcNA), dapply(mtcNA, bmin, na.rm = TRUE)) expect_equal(fmin(x, f), BY(x, f, bmin, na.rm = TRUE)) expect_equal(fmin(x, f, na.rm = FALSE), BY(x, f, bmin)) expect_equal(fmin(xNA, f, na.rm = FALSE), BY(xNA, f, bmin)) expect_equal(fmin(xNA, f), inf2NA(BY(xNA, f, bmin, na.rm = TRUE))) expect_equal(fmin(m, g), BY(m, g, bmin, na.rm = TRUE)) expect_equal(fmin(m, g, na.rm = FALSE), BY(m, g, bmin)) expect_equal(fmin(mNA, g, na.rm = FALSE), BY(mNA, g, bmin)) expect_equal(fmin(mNA, g), inf2NA(BY(mNA, g, bmin, na.rm = TRUE))) # bmin(NA, na.rm = TRUE) gives Inf expect_equal(fmin(mtcars, g), BY(mtcars, g, bmin, na.rm = TRUE)) expect_equal(fmin(mtcars, g, na.rm = FALSE), BY(mtcars, g, bmin)) expect_equal(fmin(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bmin)) expect_equal(fmin(mtcNA, g), inf2NA(BY(mtcNA, g, bmin, na.rm = TRUE))) # bmin(NA, na.rm = TRUE) gives Inf }) test_that("fmin performs numerically stable", { expect_true(all_obj_equal(replicate(50, fmin(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mtcNA, g), simplify = FALSE))) }) test_that("fmin handles special values in the right way", { expect_equal(fmin(NA), NA_real_) expect_equal(fmin(NaN), NaN) expect_equal(fmin(Inf), Inf) expect_equal(fmin(-Inf), -Inf) expect_equal(fmin(TRUE), 1) expect_equal(fmin(FALSE), 0) expect_equal(fmin(NA, na.rm = FALSE), NA_real_) expect_equal(fmin(NaN, na.rm = FALSE), NaN) expect_equal(fmin(Inf, na.rm = FALSE), Inf) expect_equal(fmin(-Inf, na.rm = FALSE), -Inf) expect_equal(fmin(TRUE, na.rm = FALSE), 1) expect_equal(fmin(FALSE, na.rm = FALSE), 0) }) test_that("fmin produces errors for wrong input", { expect_error(fmin("a")) expect_error(fmin(NA_character_)) expect_error(fmin(mNAc)) expect_error(fmin(mNAc, f)) expect_error(fmin(1:2,1:3)) expect_error(fmin(m,1:31)) expect_error(fmin(mtcars,1:31)) expect_error(fmin(wlddev)) expect_error(fmin(wlddev, wlddev$iso3c)) }) # fmax double test_that("fmax performs like base::max", { expect_equal(fmax(NA), bmax(NA)) expect_equal(fmax(NA, na.rm = FALSE), bmax(NA)) expect_equal(fmax(1), bmax(1, na.rm = TRUE)) expect_equal(fmax(1:3), bmax(1:3, na.rm = TRUE)) expect_equal(fmax(-1:1), bmax(-1:1, na.rm = TRUE)) expect_equal(fmax(1, na.rm = FALSE), bmax(1)) expect_equal(fmax(1:3, na.rm = FALSE), bmax(1:3)) expect_equal(fmax(-1:1, na.rm = FALSE), bmax(-1:1)) expect_equal(fmax(x), bmax(x, na.rm = TRUE)) expect_equal(fmax(x, na.rm = FALSE), bmax(x)) expect_equal(fmax(xNA, na.rm = FALSE), bmax(xNA)) expect_equal(fmax(xNA), bmax(xNA, na.rm = TRUE)) expect_equal(fmax(mtcars), fmax(m)) expect_equal(fmax(m), dapply(m, bmax, na.rm = TRUE)) expect_equal(fmax(m, na.rm = FALSE), dapply(m, bmax)) expect_equal(fmax(mNA, na.rm = FALSE), dapply(mNA, bmax)) expect_equal(fmax(mNA), dapply(mNA, bmax, na.rm = TRUE)) expect_equal(fmax(mtcars), dapply(mtcars, bmax, na.rm = TRUE)) expect_equal(fmax(mtcars, na.rm = FALSE), dapply(mtcars, bmax)) expect_equal(fmax(mtcNA, na.rm = FALSE), dapply(mtcNA, bmax)) expect_equal(fmax(mtcNA), dapply(mtcNA, bmax, na.rm = TRUE)) expect_equal(fmax(x, f), BY(x, f, bmax, na.rm = TRUE)) expect_equal(fmax(x, f, na.rm = FALSE), BY(x, f, bmax)) expect_equal(fmax(xNA, f, na.rm = FALSE), BY(xNA, f, bmax)) expect_equal(fmax(xNA, f), inf2NA(BY(xNA, f, bmax, na.rm = TRUE))) expect_equal(fmax(m, g), BY(m, g, bmax, na.rm = TRUE)) expect_equal(fmax(m, g, na.rm = FALSE), BY(m, g, bmax)) expect_equal(fmax(mNA, g, na.rm = FALSE), BY(mNA, g, bmax)) expect_equal(fmax(mNA, g), inf2NA(BY(mNA, g, bmax, na.rm = TRUE))) # bmax(NA, na.rm = TRUE) gives -Inf expect_equal(fmax(mtcars, g), BY(mtcars, g, bmax, na.rm = TRUE)) expect_equal(fmax(mtcars, g, na.rm = FALSE), BY(mtcars, g, bmax)) expect_equal(fmax(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bmax)) expect_equal(fmax(mtcNA, g), inf2NA(BY(mtcNA, g, bmax, na.rm = TRUE))) # bmax(NA, na.rm = TRUE) gives -Inf }) test_that("fmax performs numerically stable", { expect_true(all_obj_equal(replicate(50, fmax(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mtcNA, g), simplify = FALSE))) }) test_that("fmax handles special values in the right way", { expect_equal(fmax(NA), NA_real_) expect_equal(fmax(NaN), NaN) expect_equal(fmax(Inf), Inf) expect_equal(fmax(-Inf), -Inf) expect_equal(fmax(TRUE), 1) expect_equal(fmax(FALSE), 0) expect_equal(fmax(NA, na.rm = FALSE), NA_real_) expect_equal(fmax(NaN, na.rm = FALSE), NaN) expect_equal(fmax(Inf, na.rm = FALSE), Inf) expect_equal(fmax(-Inf, na.rm = FALSE), -Inf) expect_equal(fmax(TRUE, na.rm = FALSE), 1) expect_equal(fmax(FALSE, na.rm = FALSE), 0) }) test_that("fmax produces errors for wrong input", { expect_error(fmax("a")) expect_error(fmax(NA_character_)) expect_error(fmax(mNAc)) expect_error(fmax(mNAc, f)) expect_error(fmax(1:2,1:3)) expect_error(fmax(m,1:31)) expect_error(fmax(mtcars,1:31)) expect_error(fmax(wlddev)) expect_error(fmax(wlddev, wlddev$iso3c)) }) # fmin int x <- as.integer(x) xNA <- as.integer(xNA) mtcNA <- dapply(mtcNA, as.integer) mtcars <- dapply(mtcars, as.integer) storage.mode(m) <- "integer" storage.mode(mNA) <- "integer" toint <- function(x) { storage.mode(x) <- "integer" x } test_that("fmin with integers performs like base::min", { expect_identical(fmin(x), bmin(x, na.rm = TRUE)) expect_identical(fmin(x, na.rm = FALSE), bmin(x)) expect_identical(fmin(xNA, na.rm = FALSE), bmin(xNA)) expect_identical(fmin(xNA), bmin(xNA, na.rm = TRUE)) expect_identical(toint(fmin(mtcars)), fmin(m)) expect_identical(fmin(m), dapply(m, bmin, na.rm = TRUE)) expect_identical(fmin(m, na.rm = FALSE), dapply(m, bmin)) expect_identical(fmin(mNA, na.rm = FALSE), dapply(mNA, bmin)) expect_identical(fmin(mNA), dapply(mNA, bmin, na.rm = TRUE)) expect_identical(toint(fmin(mtcars)), dapply(mtcars, bmin, na.rm = TRUE)) expect_identical(toint(fmin(mtcars, na.rm = FALSE)), dapply(mtcars, bmin)) expect_identical(toint(fmin(mtcNA, na.rm = FALSE)), dapply(mtcNA, bmin)) expect_identical(toint(fmin(mtcNA)), dapply(mtcNA, bmin, na.rm = TRUE)) expect_identical(fmin(x, f), BY(x, f, bmin, na.rm = TRUE)) expect_identical(fmin(x, f, na.rm = FALSE), BY(x, f, bmin)) expect_identical(fmin(xNA, f, na.rm = FALSE), BY(xNA, f, bmin)) expect_identical(fmin(xNA, f), inf2NA(BY(xNA, f, bmin, na.rm = TRUE))) expect_identical(fmin(m, g), BY(m, g, bmin, na.rm = TRUE)) expect_identical(fmin(m, g, na.rm = FALSE), BY(m, g, bmin)) expect_identical(fmin(mNA, g, na.rm = FALSE), BY(mNA, g, bmin)) expect_identical(fmin(mNA, g), toint(inf2NA(BY(mNA, g, bmin, na.rm = TRUE)))) # bmin(NA, na.rm = TRUE) gives Inf expect_identical(fmin(mtcars, g), BY(mtcars, g, bmin, na.rm = TRUE)) expect_identical(fmin(mtcars, g, na.rm = FALSE), BY(mtcars, g, bmin)) expect_identical(fmin(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bmin)) expect_identical(fmin(mtcNA, g), dapply(inf2NA(BY(mtcNA, g, bmin, na.rm = TRUE)), toint)) # bmin(NA, na.rm = TRUE) gives Inf }) test_that("fmin with integers performs numerically stable", { expect_true(all_identical(replicate(50, fmin(x), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(xNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(m), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mtcars), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mtcNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(x, f), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(xNA, f), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(m, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mNA, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mtcars, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mtcNA, g), simplify = FALSE))) }) test_that("fmin with integers produces errors for wrong input", { expect_error(fmin(m,1:31)) expect_error(fmin(mtcars,1:31)) }) # fmax int test_that("fmax with integers performs like base::max", { expect_identical(fmax(x), bmax(x, na.rm = TRUE)) expect_identical(fmax(x, na.rm = FALSE), bmax(x)) expect_identical(fmax(xNA, na.rm = FALSE), bmax(xNA)) expect_identical(fmax(xNA), bmax(xNA, na.rm = TRUE)) expect_identical(toint(fmax(mtcars)), fmax(m)) expect_identical(fmax(m), dapply(m, bmax, na.rm = TRUE)) expect_identical(fmax(m, na.rm = FALSE), dapply(m, bmax)) expect_identical(fmax(mNA, na.rm = FALSE), dapply(mNA, bmax)) expect_identical(fmax(mNA), dapply(mNA, bmax, na.rm = TRUE)) expect_identical(toint(fmax(mtcars)), dapply(mtcars, bmax, na.rm = TRUE)) expect_identical(toint(fmax(mtcars, na.rm = FALSE)), dapply(mtcars, bmax)) expect_identical(toint(fmax(mtcNA, na.rm = FALSE)), dapply(mtcNA, bmax)) expect_identical(toint(fmax(mtcNA)), dapply(mtcNA, bmax, na.rm = TRUE)) expect_identical(fmax(x, f), BY(x, f, bmax, na.rm = TRUE)) expect_identical(fmax(x, f, na.rm = FALSE), BY(x, f, bmax)) expect_identical(fmax(xNA, f, na.rm = FALSE), BY(xNA, f, bmax)) expect_identical(fmax(xNA, f), inf2NA(BY(xNA, f, bmax, na.rm = TRUE))) expect_identical(fmax(m, g), BY(m, g, bmax, na.rm = TRUE)) expect_identical(fmax(m, g, na.rm = FALSE), BY(m, g, bmax)) expect_identical(fmax(mNA, g, na.rm = FALSE), BY(mNA, g, bmax)) expect_identical(fmax(mNA, g), toint(inf2NA(BY(mNA, g, bmax, na.rm = TRUE)))) # bmax(NA, na.rm = TRUE) gives -Inf expect_identical(fmax(mtcars, g), BY(mtcars, g, bmax, na.rm = TRUE)) expect_identical(fmax(mtcars, g, na.rm = FALSE), BY(mtcars, g, bmax)) expect_identical(fmax(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bmax)) expect_identical(fmax(mtcNA, g), dapply(inf2NA(BY(mtcNA, g, bmax, na.rm = TRUE)), toint)) # bmax(NA, na.rm = TRUE) gives -Inf }) test_that("fmax with integers performs numerically stable", { expect_true(all_identical(replicate(50, fmax(x), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(xNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(m), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mtcars), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mtcNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(x, f), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(xNA, f), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(m, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mNA, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mtcars, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mtcNA, g), simplify = FALSE))) }) test_that("fmax with integers produces errors for wrong input", { expect_error(fmax(m,1:31)) expect_error(fmax(mtcars,1:31)) }) options(warn = 1) collapse/tests/testthat/test-fdiff-fgrowth-D-G.R0000644000176200001440000021610714166276767021314 0ustar liggesuserscontext("fdiff / D and fgrowth / G") # rm(list = ls()) # TODO: test computations on irregular time series and panels set.seed(101) x <- abs(10*rnorm(100)) xNA <- x xNA[sample.int(100, 20)] <- NA f <- as.factor(rep(1:10, each = 10)) t <- as.factor(rep(1:10, 10)) data <- setRownames(wlddev[wlddev$iso3c %in% c("BLZ","IND","USA","SRB","GRL"), ]) g <- GRP(droplevels(data$iso3c)) td <- as.factor(data$year) data <- num_vars(data) dataNA <- na_insert(data) m <- qM(data) mNA <- qM(dataNA) mNAc <- mNA storage.mode(mNAc) <- "character" # Creatung unordered data: o = order(rnorm(100)) xuo = x[o] xNAuo = xNA[o] fuo = f[o] tuo = t[o] t2uo = seq_len(100)[o] o = order(o) od = order(rnorm(length(td))) muo = m[od, ] datauo = data[od, ] guo = as_factor_GRP(g)[od] tduo = td[od] t2duo = seq_along(od)[od] od = order(od) basediff <- function(x, n = 1, diff = 1) c(rep(NA_real_, n * diff), diff.default(x, n, diff)) baselogdiff <- function(x, n = 1) c(rep(NA_real_, n), diff.default(log(x), n)*100) basegrowth <- function(x, n = 1) c(rep(NA_real_, n), diff.default(x, n)/x[1:(length(x)-n)]*100) # fdiff test_that("fdiff performs like basediff", { expect_equal(fdiff(1:10), basediff(1:10)) expect_equal(fdiff(1:10, 2), basediff(1:10, 2)) expect_equal(fdiff(1:10, 1, 2), basediff(1:10, 1, 2)) expect_equal(fdiff(1:10, 2, 2), basediff(1:10, 2, 2)) expect_equal(fdiff(-1:1), basediff(-1:1)) expect_equal(fdiff(x), basediff(x)) expect_equal(fdiff(x, 2, 2), basediff(x, 2, 2)) expect_equal(fdiff(xNA), basediff(xNA)) expect_equal(fdiff(xNA, 2, 2), basediff(xNA, 2, 2)) expect_equal(qM(fdiff(data)), setRownames(fdiff(m), NULL)) expect_equal(fdiff(m, stubs = FALSE), dapply(m, basediff)) expect_equal(fdiff(m, 2, 2, stubs = FALSE), dapply(m, basediff, 2, 2)) expect_equal(fdiff(mNA, stubs = FALSE), dapply(mNA, basediff)) expect_equal(fdiff(mNA, 2, 2, stubs = FALSE), dapply(mNA, basediff, 2, 2)) expect_equal(fdiff(data, stubs = FALSE), dapply(data, basediff)) expect_equal(fdiff(data, 2, 2, stubs = FALSE), dapply(data, basediff, 2, 2)) expect_equal(fdiff(dataNA, stubs = FALSE), dapply(dataNA, basediff)) expect_equal(fdiff(dataNA, 2, 2, stubs = FALSE), dapply(dataNA, basediff, 2, 2)) expect_equal(fdiff(x, 1, 1, f), BY(x, f, basediff, use.g.names = FALSE)) expect_equal(fdiff(x, 2, 2, f), BY(x, f, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(xNA, 1, 1, f), BY(xNA, f, basediff, use.g.names = FALSE)) expect_equal(fdiff(xNA, 2, 2, f), BY(xNA, f, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(m, 1, 1, g, stubs = FALSE), BY(m, g, basediff, use.g.names = FALSE)) expect_equal(fdiff(m, 2, 2, g, stubs = FALSE), BY(m, g, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(mNA, 1, 1, g, stubs = FALSE), BY(mNA, g, basediff, use.g.names = FALSE)) expect_equal(fdiff(mNA, 2, 2, g, stubs = FALSE), BY(mNA, g, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(data, 1, 1, g, stubs = FALSE), BY(data, g, basediff, use.g.names = FALSE)) expect_equal(fdiff(data, 2, 2, g, stubs = FALSE), BY(data, g, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(dataNA, 1, 1, g, stubs = FALSE), BY(dataNA, g, basediff, use.g.names = FALSE)) expect_equal(fdiff(dataNA, 2, 2, g, stubs = FALSE), BY(dataNA, g, basediff, 2, 2, use.g.names = FALSE)) # Adding time-variable: Computing fully identified panel-differences !! expect_equal(fdiff(x, 1, 1, f, t), BY(x, f, basediff, use.g.names = FALSE)) expect_equal(fdiff(x, 2, 2, f, t), BY(x, f, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(xNA, 1, 1, f, t), BY(xNA, f, basediff, use.g.names = FALSE)) expect_equal(fdiff(xNA, 2, 2, f, t), BY(xNA, f, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(m, 1, 1, g, td, stubs = FALSE), BY(m, g, basediff, use.g.names = FALSE)) expect_equal(fdiff(m, 2, 2, g, td, stubs = FALSE), BY(m, g, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(mNA, 1, 1, g, td, stubs = FALSE), BY(mNA, g, basediff, use.g.names = FALSE)) expect_equal(fdiff(mNA, 2, 2, g, td, stubs = FALSE), BY(mNA, g, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(data, 1, 1, g, td, stubs = FALSE), BY(data, g, basediff, use.g.names = FALSE)) expect_equal(fdiff(data, 2, 2, g, td, stubs = FALSE), BY(data, g, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(dataNA, 1, 1, g, td, stubs = FALSE), BY(dataNA, g, basediff, use.g.names = FALSE)) expect_equal(fdiff(dataNA, 2, 2, g, td, stubs = FALSE), BY(dataNA, g, basediff, 2, 2, use.g.names = FALSE)) }) test_that("fdiff performs lagged/leaded and iterated (panel-) vector differences without errors", { expect_visible(fdiff(1:10, -2:2)) expect_visible(fdiff(1:10, 1:2)) expect_visible(fdiff(1:10, -1:-2)) expect_visible(fdiff(1:10, 0)) expect_visible(fdiff(1:10, -2:2, 2)) expect_visible(fdiff(1:10, 1:2, 2)) expect_visible(fdiff(1:10, -1:-2, 2)) expect_visible(fdiff(1:10, 0, 2)) expect_visible(fdiff(1:10, -2:2, 1:2)) expect_visible(fdiff(1:10, 1:2, 1:2)) expect_visible(fdiff(1:10, -1:-2, 1:2)) expect_visible(fdiff(1:10, 0, 1:2)) expect_visible(fdiff(xNA, -2:2)) expect_visible(fdiff(xNA, 1:2)) expect_visible(fdiff(xNA, -1:-2)) expect_visible(fdiff(xNA, 0)) expect_visible(fdiff(xNA, -2:2, 2)) expect_visible(fdiff(xNA, 1:2, 2)) expect_visible(fdiff(xNA, -1:-2, 2)) expect_visible(fdiff(xNA, 0, 2)) expect_visible(fdiff(xNA, -2:2, 1:2)) expect_visible(fdiff(xNA, 1:2, 1:2)) expect_visible(fdiff(xNA, -1:-2, 1:2)) expect_visible(fdiff(xNA, 0, 1:2)) expect_visible(fdiff(xNA, -2:2, 1, f)) expect_visible(fdiff(xNA, 1:2, 1, f)) expect_visible(fdiff(xNA, -1:-2, 1, f)) expect_visible(fdiff(xNA, 0, 1, f)) expect_visible(fdiff(xNA, -2:2, 2, f)) expect_visible(fdiff(xNA, 1:2, 2, f)) expect_visible(fdiff(xNA, -1:-2, 2, f)) expect_visible(fdiff(xNA, 0, 2, f)) expect_visible(fdiff(xNA, -2:2, 1:2, f)) expect_visible(fdiff(xNA, 1:2, 1:2, f)) expect_visible(fdiff(xNA, -1:-2, 1:2, f)) expect_visible(fdiff(xNA, 0, 1:2, f)) expect_visible(fdiff(xNA, -2:2, 1, f, t)) expect_visible(fdiff(xNA, 1:2, 1, f, t)) expect_visible(fdiff(xNA, -1:-2, 1, f, t)) expect_visible(fdiff(xNA, 0, 1, f, t)) expect_visible(fdiff(xNA, -2:2, 2, f, t)) expect_visible(fdiff(xNA, 1:2, 2, f, t)) expect_visible(fdiff(xNA, -1:-2, 2, f, t)) expect_visible(fdiff(xNA, 0, 2, f, t)) expect_visible(fdiff(xNA, -2:2, 1:2, f, t)) expect_visible(fdiff(xNA, 1:2, 1:2, f, t)) expect_visible(fdiff(xNA, -1:-2, 1:2, f, t)) expect_visible(fdiff(xNA, 0, 1:2, f, t)) }) test_that("fdiff performs lagged/leaded and iterated (panel-) matrix differences without errors", { expect_visible(fdiff(m, -2:2)) expect_visible(fdiff(m, 1:2)) expect_visible(fdiff(m, -1:-2)) expect_visible(fdiff(m, 0)) expect_visible(fdiff(m, -2:2, 2)) expect_visible(fdiff(m, 1:2, 2)) expect_visible(fdiff(m, -1:-2, 2)) expect_visible(fdiff(m, 0, 2)) expect_visible(fdiff(m, -2:2, 1:2)) expect_visible(fdiff(m, 1:2, 1:2)) expect_visible(fdiff(m, -1:-2, 1:2)) expect_visible(fdiff(m, 0, 1:2)) expect_visible(fdiff(m, -2:2, 1, g)) expect_visible(fdiff(m, 1:2, 1, g)) expect_visible(fdiff(m, -1:-2, 1, g)) expect_visible(fdiff(m, 0, 1, g)) expect_visible(fdiff(m, -2:2, 2, g)) expect_visible(fdiff(m, 1:2, 2, g)) expect_visible(fdiff(m, -1:-2, 2, g)) expect_visible(fdiff(m, 0, 2, g)) expect_visible(fdiff(m, -2:2, 1:2, g)) expect_visible(fdiff(m, 1:2, 1:2, g)) expect_visible(fdiff(m, -1:-2, 1:2, g)) expect_visible(fdiff(m, 0, 1:2, g)) expect_visible(fdiff(m, -2:2, 1, g, td)) expect_visible(fdiff(m, 1:2, 1, g, td)) expect_visible(fdiff(m, -1:-2, 1, g, td)) expect_visible(fdiff(m, 0, 1, g, td)) expect_visible(fdiff(m, -2:2, 2, g, td)) expect_visible(fdiff(m, 1:2, 2, g, td)) expect_visible(fdiff(m, -1:-2, 2, g, td)) expect_visible(fdiff(m, 0, 2, g, td)) expect_visible(fdiff(m, -2:2, 1:2, g, td)) expect_visible(fdiff(m, 1:2, 1:2, g, td)) expect_visible(fdiff(m, -1:-2, 1:2, g, td)) expect_visible(fdiff(m, 0, 1:2, g, td)) }) test_that("fdiff performs lagged/leaded and iterated (panel-) data.frame differences without errors", { expect_visible(fdiff(data, -2:2)) expect_visible(fdiff(data, 1:2)) expect_visible(fdiff(data, -1:-2)) expect_visible(fdiff(data, 0)) expect_visible(fdiff(data, -2:2, 2)) expect_visible(fdiff(data, 1:2, 2)) expect_visible(fdiff(data, -1:-2, 2)) expect_visible(fdiff(data, 0, 2)) expect_visible(fdiff(data, -2:2, 1:2)) expect_visible(fdiff(data, 1:2, 1:2)) expect_visible(fdiff(data, -1:-2, 1:2)) expect_visible(fdiff(data, 0, 1:2)) expect_visible(fdiff(data, -2:2, 1, g)) expect_visible(fdiff(data, 1:2, 1, g)) expect_visible(fdiff(data, -1:-2, 1, g)) expect_visible(fdiff(data, 0, 1, g)) expect_visible(fdiff(data, -2:2, 2, g)) expect_visible(fdiff(data, 1:2, 2, g)) expect_visible(fdiff(data, -1:-2, 2, g)) expect_visible(fdiff(data, 0, 2, g)) expect_visible(fdiff(data, -2:2, 1:2, g)) expect_visible(fdiff(data, 1:2, 1:2, g)) expect_visible(fdiff(data, -1:-2, 1:2, g)) expect_visible(fdiff(data, 0, 1:2, g)) expect_visible(fdiff(data, -2:2, 1, g, td)) expect_visible(fdiff(data, 1:2, 1, g, td)) expect_visible(fdiff(data, -1:-2, 1, g, td)) expect_visible(fdiff(data, 0, 1, g, td)) expect_visible(fdiff(data, -2:2, 2, g, td)) expect_visible(fdiff(data, 1:2, 2, g, td)) expect_visible(fdiff(data, -1:-2, 2, g, td)) expect_visible(fdiff(data, 0, 2, g, td)) expect_visible(fdiff(data, -2:2, 1:2, g, td)) expect_visible(fdiff(data, 1:2, 1:2, g, td)) expect_visible(fdiff(data, -1:-2, 1:2, g, td)) expect_visible(fdiff(data, 0, 1:2, g, td)) }) test_that("fdiff correctly handles unordered time-series and panel-series computations", { expect_equal(fdiff(x, -2:2, 1:2, t = 1:100), fdiff(x, -2:2, 1:2)) expect_equal(fdiff(xNA, -2:2, 1:2, t = 1:100), fdiff(xNA, -2:2, 1:2)) expect_equal(fdiff(m, -2:2, 1:2, t = seq_along(td)), fdiff(m, -2:2, 1:2)) expect_equal(fdiff(data, -2:2, 1:2, t = seq_along(td)), fdiff(data, -2:2, 1:2)) expect_equal(fdiff(xuo, -2:2, 1:2, t = t2uo)[o,], unclass(fdiff(x, -2:2, 1:2))) expect_equal(fdiff(xNAuo, -2:2, 1:2, t = t2uo)[o,], unclass(fdiff(xNA, -2:2, 1:2))) expect_equal(fdiff(muo, -2:2, 1:2, t = t2duo)[od,], unclass(fdiff(m, -2:2, 1:2))) expect_equal(fdiff(datauo, -2:2, 1:2, t = t2duo)[od,], fdiff(data, -2:2, 1:2)) expect_equal(fdiff(xuo, -2:2, 1:2, fuo, tuo)[o,], unclass(fdiff(x, -2:2, 1:2, f, t))) expect_equal(fdiff(xNAuo, -2:2, 1:2, fuo, tuo)[o,], unclass(fdiff(xNA, -2:2, 1:2, f, t))) expect_equal(fdiff(muo, -2:2, 1:2, guo, tduo)[od,], unclass(fdiff(m, -2:2, 1:2, g, td))) expect_equal(fdiff(datauo, -2:2, 1:2, guo, tduo)[od,], fdiff(data, -2:2, 1:2, g, td)) }) test_that("fdiff performs numerically stable in ordered computations", { expect_true(all_obj_equal(replicate(50, fdiff(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(data), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(dataNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(x, 1, 1, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(x, -2:2, 1:2, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(xNA, 1, 1, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(xNA, -2:2, 1:2, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(m, 1, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(m, -2:2, 1:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(mNA, 1, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(mNA, -2:2, 1:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(data, 1, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(data, -2:2, 1:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(dataNA, 1, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(dataNA, -2:2, 1:2, g), simplify = FALSE))) }) test_that("fdiff performs numerically stable in unordered computations", { expect_true(all_obj_equal(replicate(50, fdiff(xuo, t = t2uo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(xNAuo, t = t2uo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(muo, t = t2duo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(datauo, t = t2duo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(xuo, 1, 1, fuo, tuo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(xuo, -2:2, 1:2, fuo, tuo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(muo, 1, 1, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(muo, -2:2, 1:2, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(datauo, 1, 1, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(datauo, -2:2, 1:2, guo, tduo), simplify = FALSE))) }) test_that("fdiff handles special values in the right way", { expect_equal(fdiff(c(1,NA)), c(NA_real_,NA_real_)) expect_equal(fdiff(c(NA,1)), c(NA_real_,NA_real_)) expect_equal(fdiff(c(NaN,1)), c(NA_real_,NaN)) expect_equal(fdiff(c(1,NaN)), c(NA_real_,NaN)) expect_equal(fdiff(c(Inf,1)), c(NA,-Inf)) expect_equal(fdiff(c(1,Inf)), c(NA,Inf)) expect_equal(fdiff(c(Inf,NA)), c(NA_real_,NA_real_)) expect_equal(fdiff(c(NA,Inf)), c(NA_real_,NA_real_)) expect_equal(fdiff(c(Inf,-Inf)), c(NA,-Inf)) expect_equal(fdiff(c(-Inf,Inf)), c(NA,Inf)) expect_equal(fdiff(c(Inf,Inf)), c(NA,NaN)) expect_equal(fdiff(c(TRUE,TRUE)), c(NA_real_,0)) expect_equal(fdiff(c(TRUE,FALSE)), c(NA_real_,-1)) expect_equal(fdiff(c(FALSE,TRUE)), c(NA_real_,1)) }) test_that("fdiff produces errors for wrong input", { # wrong type: normally guaranteed by C++ expect_error(fdiff("a")); 1 expect_error(fdiff(NA_character_)); 2 expect_error(fdiff(mNAc)); 3 expect_error(fdiff(wlddev)); 4 expect_error(fdiff(mNAc, f)); 5 expect_error(fdiff(x, "1", "2")); 6 # if n*diff equals or exceeds length(x), should give error expect_error(fdiff(x,100)); 7 expect_error(fdiff(x,1,100)); 8 expect_error(fdiff(x,50,2)); 9 expect_error(fdiff(x,20,5)); 10 # if n*diff exceeds average group size, should give error # expect_warning(fdiff(x,11,1,f)); 11 -> Some fail on i386 !! # expect_warning(fdiff(x,1,11,f)); 12 # expect_warning(fdiff(x,1,11,f,t)); 13 # expect_warning(fdiff(x,11,1,f,t)); 14 # passing repeated n-values or non-positive or non-consecutive diff values should give error expect_error(fdiff(x,c(1,1))); 15 expect_error(fdiff(x,c(-1,-1))); 16 expect_error(fdiff(x,1,c(1,1))); 17 expect_error(fdiff(x,1,c(-1,-1))); 18 expect_error(fdiff(x,1,2:1)); 19 expect_error(fdiff(x,1,0)); 20 expect_error(fdiff(x,1,-1)); 21 expect_error(fdiff(x,f)); 22 # common source of error probably is passing the factor in a wrong slot expect_error(fdiff(x,1,f)); 23 expect_error(fdiff(x,c(1,1),1,f)); 24 expect_error(fdiff(x,c(1,1),1,f,t)); 25 expect_error(fdiff(x,1,c(1,1),f)); 26 expect_error(fdiff(x,1,c(1,1),f,t)); 27 expect_error(fdiff(x,1,2:1,f)); 28 expect_error(fdiff(x,1,2:1,f,t)); 29 expect_error(fdiff(x,1,0,f)); 30 expect_error(fdiff(x,1,0,f,t)); 31 expect_error(fdiff(x,1,-1,f)); 32 expect_error(fdiff(x,1,-1,f,t)); 33 # repeated values or gaps in time-variable should give error expect_error(fdiff(1:3, t = c(1,1,2))); 34 expect_error(fdiff(1:3, t = c(1,2,2))); 35 expect_error(fdiff(1:3, t = c(1,2,1))); 36 expect_error(fdiff(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,2,1:3,1:4))) # This is the only possible statement which does not throw a reteated timevar error because the first C++ index is 0, and omap is also initialized with 0's. expect_error(fdiff(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,1,1:3,1:4))); 37 expect_error(fdiff(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1,1,3,4))); 38 expect_error(fdiff(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,2,1:3,1:4))); 39 expect_visible(fdiff(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))); 40 expect_error(fdiff(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))); 40 expect_error(fdiff(1:10, g = c(1,2,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1:4))); 41 expect_visible(fdiff(1:10, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))); 42 expect_error(fdiff(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))); 42 # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(fdiff(1:3, t = 1:2)); 43 expect_error(fdiff(1:3, t = 1:4)); 44 expect_error(fdiff(1:3, g = 1:2)); 45 expect_error(fdiff(1:3, g = 1:4)); 46 expect_error(fdiff(1:4, g = c(1,1,2,2), t = c(1,2,1))); 47 expect_error(fdiff(1:4, g = c(1,2,2), t = c(1,2,1,2))); 48 }) # D test_that("D produces errors for wrong input", { # wrong type: normally guaranteed by C++ expect_error(D("a")) expect_error(D(NA_character_)) expect_error(D(mNAc)) expect_visible(D(wlddev)) expect_error(D(mNAc, f)) expect_error(D(x, "1", "2")) # if n*diff equals or exceeds length(x), should give error expect_error(D(x,100)) expect_error(D(x,1,100)) expect_error(D(x,50,2)) expect_error(D(x,20,5)) # if n*diff exceeds average group size, should give error # expect_warning(D(x,11,1,f)) -> Some fail on i386 # expect_warning(D(x,1,11,f)) # expect_warning(D(x,1,11,f,t)) # expect_warning(D(x,11,1,f,t)) # passing repeated n-values or non-positive or non-consecutive diff values should give error expect_error(D(x,c(1,1))) expect_error(D(x,c(-1,-1))) expect_error(D(x,1,c(1,1))) expect_error(D(x,1,c(-1,-1))) expect_error(D(x,1,2:1)) expect_error(D(x,1,0)) expect_error(D(x,1,-1)) expect_error(D(x,f)) # common source of error probably is passing the factor in a wrong slot expect_error(D(x,1,f)) expect_error(D(x,c(1,1),1,f)) expect_error(D(x,c(1,1),1,f,t)) expect_error(D(x,1,c(1,1),f)) expect_error(D(x,1,c(1,1),f,t)) expect_error(D(x,1,2:1,f)) expect_error(D(x,1,2:1,f,t)) expect_error(D(x,1,0,f)) expect_error(D(x,1,0,f,t)) expect_error(D(x,1,-1,f)) expect_error(D(x,1,-1,f,t)) # repeated values or gaps in time-variable should give error expect_error(D(1:3, t = c(1,1,2))) expect_error(D(1:3, t = c(1,2,2))) expect_error(D(1:3, t = c(1,2,1))) expect_error(D(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,2,1:3,1:4))) # This is the only possible statement which does not throw a reteated timevar error because the first C++ index is 0, and omap is also initialized with 0's. expect_error(D(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,1,1:3,1:4))) expect_error(D(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1,1,3,4))) expect_error(D(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,2,1:3,1:4))) expect_visible(D(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))) expect_error(D(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))) expect_error(D(1:10, g = c(1,2,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1:4))) expect_visible(D(1:10, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))) expect_error(D(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))) # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(D(1:3, t = 1:2)) expect_error(D(1:3, t = 1:4)) expect_error(D(1:3, g = 1:2)) expect_error(D(1:3, g = 1:4)) expect_error(D(1:4, g = c(1,1,2,2), t = c(1,2,1))) expect_error(D(1:4, g = c(1,2,2), t = c(1,2,1,2))) }) test_that("D.data.frame method is foolproof", { expect_visible(D(wlddev)) expect_visible(D(wlddev, by = wlddev$iso3c)) expect_error(D(wlddev, t = ~year)) expect_visible(D(wlddev, 1, 1, wlddev$iso3c)) expect_visible(D(wlddev, 1,1, ~iso3c)) expect_error(D(wlddev, 1, ~iso3c)) expect_visible(D(wlddev, 1, 1, ~iso3c + region)) expect_visible(D(wlddev, 1,1, wlddev$iso3c, wlddev$year)) expect_visible(D(wlddev, 1,1, ~iso3c, ~year)) expect_visible(D(wlddev, cols = 9:12)) expect_visible(D(wlddev, 1,1,~iso3c, cols = 9:12)) expect_visible(D(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = 9:12)) expect_visible(D(wlddev, 1,1,~iso3c, ~year, cols = 9:12)) expect_visible(D(wlddev, cols = c("PCGDP","LIFEEX"))) expect_visible(D(wlddev, 1,1,wlddev$iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(D(wlddev, 1,1,~iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(D(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(D(wlddev, 1,1,~iso3c, ~year, cols = c("PCGDP","LIFEEX"))) expect_error(D(wlddev, cols = NULL)) expect_error(D(wlddev, 1,1,wlddev$iso3c, cols = NULL)) expect_error(D(wlddev, 1,1,~iso3c, cols = NULL)) expect_error(D(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = NULL)) expect_error(D(wlddev, 1,1,~iso3c, ~year, cols = NULL)) expect_error(D(wlddev, cols = 9:14)) expect_error(D(wlddev, 1,1,~iso3c, ~year, cols = 9:14)) expect_error(D(wlddev, cols = c("PCGDP","LIFEEX","bla"))) expect_error(D(wlddev, 1,1,~iso3c, ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_warning(D(wlddev, w = 4)) expect_error(D(wlddev, t = "year")) expect_warning(D(wlddev, g = ~year2)) expect_error(D(wlddev, t = ~year + region)) expect_error(D(wlddev, data)) expect_error(D(wlddev, 1,1,"iso3c")) expect_error(D(wlddev, 1,1,~iso3c2)) expect_error(D(wlddev, 1,1,~iso3c + bla)) expect_error(D(wlddev, 1,1,t = rnorm(30))) expect_warning(D(wlddev, 1,1,g = rnorm(30))) expect_error(D(wlddev, 1,1,mtcars$mpg, 1:29)) expect_error(D(wlddev, 1,1,mtcars$mpg, mtcars$cyl)) # this gives a repeated values error first because length(g) == length(t) expect_error(D(wlddev,1,1, ~iso3c2, ~year2)) expect_error(D(wlddev, cols = ~bla)) expect_visible(D(wlddev, 1,1,wlddev$iso3c, ~year, cols = 9:12)) expect_visible(D(wlddev, 1,1,~iso3c, wlddev$year, cols = 9:12)) expect_error(D(wlddev, 1,1,wlddev$iso3c, ~year + bla, cols = 9:12)) expect_error(D(wlddev, 2,1,~iso3c3, ~year, cols = 9:12)) expect_error(D(wlddev, cols = c("PC3GDP","LIFEEX"))) }) # fgrowth test_that("fgrowth performs like basegrowth", { expect_equal(fgrowth(1:10), basegrowth(1:10)) expect_equal(fgrowth(1:10, 2), basegrowth(1:10, 2)) expect_equal(fgrowth(-1:1), basegrowth(-1:1)) expect_equal(fgrowth(x), basegrowth(x)) expect_equal(fgrowth(x, 2), basegrowth(x, 2)) expect_equal(fgrowth(xNA), basegrowth(xNA)) expect_equal(fgrowth(xNA, 2), basegrowth(xNA, 2)) expect_equal(qM(fgrowth(data)), setRownames(fgrowth(m), NULL)) expect_equal(fgrowth(m, stubs = FALSE), dapply(m, basegrowth)) expect_equal(fgrowth(m, 2, stubs = FALSE), dapply(m, basegrowth, 2)) expect_equal(fgrowth(mNA, stubs = FALSE), dapply(mNA, basegrowth)) expect_equal(fgrowth(mNA, 2, stubs = FALSE), dapply(mNA, basegrowth, 2)) expect_equal(fgrowth(data, stubs = FALSE), dapply(data, basegrowth)) expect_equal(fgrowth(data, 2, stubs = FALSE), dapply(data, basegrowth, 2)) expect_equal(fgrowth(dataNA, stubs = FALSE), dapply(dataNA, basegrowth)) expect_equal(fgrowth(dataNA, 2, stubs = FALSE), dapply(dataNA, basegrowth, 2)) expect_equal(fgrowth(x, 1, 1, f), BY(x, f, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(x, 2, 1, f), BY(x, f, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(xNA, 1, 1, f), BY(xNA, f, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(xNA, 2, 1, f), BY(xNA, f, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(m, 1, 1, g, stubs = FALSE), BY(m, g, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(m, 2, 1, g, stubs = FALSE), BY(m, g, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(mNA, 1, 1, g, stubs = FALSE), BY(mNA, g, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(mNA, 2, 1, g, stubs = FALSE), BY(mNA, g, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(data, 1, 1, g, stubs = FALSE), BY(data, g, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(data, 2, 1, g, stubs = FALSE), BY(data, g, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(dataNA, 1, 1, g, stubs = FALSE), BY(dataNA, g, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(dataNA, 2, 1, g, stubs = FALSE), BY(dataNA, g, basegrowth, 2, use.g.names = FALSE)) # Adding time-variable: Computing fully identified panel-growtherences !! expect_equal(fgrowth(x, 1, 1, f, t), BY(x, f, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(x, 2, 1, f, t), BY(x, f, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(xNA, 1, 1, f, t), BY(xNA, f, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(xNA, 2, 1, f, t), BY(xNA, f, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(m, 1, 1, g, td, stubs = FALSE), BY(m, g, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(m, 2, 1, g, td, stubs = FALSE), BY(m, g, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(mNA, 1, 1, g, td, stubs = FALSE), BY(mNA, g, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(mNA, 2, 1, g, td, stubs = FALSE), BY(mNA, g, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(data, 1, 1, g, td, stubs = FALSE), BY(data, g, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(data, 2, 1, g, td, stubs = FALSE), BY(data, g, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(dataNA, 1, 1, g, td, stubs = FALSE), BY(dataNA, g, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(dataNA, 2, 1, g, td, stubs = FALSE), BY(dataNA, g, basegrowth, 2, use.g.names = FALSE)) }) test_that("fgrowth performs lagged/leaded and iterated (panel-) vector growth reates without errors", { expect_visible(fgrowth(1:10, -2:2)) expect_visible(fgrowth(1:10, 1:2)) expect_visible(fgrowth(1:10, -1:-2)) expect_visible(fgrowth(1:10, 0)) expect_visible(fgrowth(1:10, -2:2, 2)) expect_visible(fgrowth(1:10, 1:2, 2)) expect_visible(fgrowth(1:10, -1:-2, 2)) expect_visible(fgrowth(1:10, 0, 2)) expect_visible(fgrowth(1:10, -2:2, 1:2)) expect_visible(fgrowth(1:10, 1:2, 1:2)) expect_visible(fgrowth(1:10, -1:-2, 1:2)) expect_visible(fgrowth(1:10, 0, 1:2)) expect_visible(fgrowth(xNA, -2:2)) expect_visible(fgrowth(xNA, 1:2)) expect_visible(fgrowth(xNA, -1:-2)) expect_visible(fgrowth(xNA, 0)) expect_visible(fgrowth(xNA, -2:2, 2)) expect_visible(fgrowth(xNA, 1:2, 2)) expect_visible(fgrowth(xNA, -1:-2, 2)) expect_visible(fgrowth(xNA, 0, 2)) expect_visible(fgrowth(xNA, -2:2, 1:2)) expect_visible(fgrowth(xNA, 1:2, 1:2)) expect_visible(fgrowth(xNA, -1:-2, 1:2)) expect_visible(fgrowth(xNA, 0, 1:2)) expect_visible(fgrowth(xNA, -2:2, 1, f)) expect_visible(fgrowth(xNA, 1:2, 1, f)) expect_visible(fgrowth(xNA, -1:-2, 1, f)) expect_visible(fgrowth(xNA, 0, 1, f)) expect_visible(fgrowth(xNA, -2:2, 2, f)) expect_visible(fgrowth(xNA, 1:2, 2, f)) expect_visible(fgrowth(xNA, -1:-2, 2, f)) expect_visible(fgrowth(xNA, 0, 2, f)) expect_visible(fgrowth(xNA, -2:2, 1:2, f)) expect_visible(fgrowth(xNA, 1:2, 1:2, f)) expect_visible(fgrowth(xNA, -1:-2, 1:2, f)) expect_visible(fgrowth(xNA, 0, 1:2, f)) expect_visible(fgrowth(xNA, -2:2, 1, f, t)) expect_visible(fgrowth(xNA, 1:2, 1, f, t)) expect_visible(fgrowth(xNA, -1:-2, 1, f, t)) expect_visible(fgrowth(xNA, 0, 1, f, t)) expect_visible(fgrowth(xNA, -2:2, 2, f, t)) expect_visible(fgrowth(xNA, 1:2, 2, f, t)) expect_visible(fgrowth(xNA, -1:-2, 2, f, t)) expect_visible(fgrowth(xNA, 0, 2, f, t)) expect_visible(fgrowth(xNA, -2:2, 1:2, f, t)) expect_visible(fgrowth(xNA, 1:2, 1:2, f, t)) expect_visible(fgrowth(xNA, -1:-2, 1:2, f, t)) expect_visible(fgrowth(xNA, 0, 1:2, f, t)) }) test_that("fgrowth performs lagged/leaded and iterated (panel-) matrix growth rates without errors", { expect_visible(fgrowth(m, -2:2)) expect_visible(fgrowth(m, 1:2)) expect_visible(fgrowth(m, -1:-2)) expect_visible(fgrowth(m, 0)) expect_visible(fgrowth(m, -2:2, 2)) expect_visible(fgrowth(m, 1:2, 2)) expect_visible(fgrowth(m, -1:-2, 2)) expect_visible(fgrowth(m, 0, 2)) expect_visible(fgrowth(m, -2:2, 1:2)) expect_visible(fgrowth(m, 1:2, 1:2)) expect_visible(fgrowth(m, -1:-2, 1:2)) expect_visible(fgrowth(m, 0, 1:2)) expect_visible(fgrowth(m, -2:2, 1, g)) expect_visible(fgrowth(m, 1:2, 1, g)) expect_visible(fgrowth(m, -1:-2, 1, g)) expect_visible(fgrowth(m, 0, 1, g)) expect_visible(fgrowth(m, -2:2, 2, g)) expect_visible(fgrowth(m, 1:2, 2, g)) expect_visible(fgrowth(m, -1:-2, 2, g)) expect_visible(fgrowth(m, 0, 2, g)) expect_visible(fgrowth(m, -2:2, 1:2, g)) expect_visible(fgrowth(m, 1:2, 1:2, g)) expect_visible(fgrowth(m, -1:-2, 1:2, g)) expect_visible(fgrowth(m, 0, 1:2, g)) expect_visible(fgrowth(m, -2:2, 1, g, td)) expect_visible(fgrowth(m, 1:2, 1, g, td)) expect_visible(fgrowth(m, -1:-2, 1, g, td)) expect_visible(fgrowth(m, 0, 1, g, td)) expect_visible(fgrowth(m, -2:2, 2, g, td)) expect_visible(fgrowth(m, 1:2, 2, g, td)) expect_visible(fgrowth(m, -1:-2, 2, g, td)) expect_visible(fgrowth(m, 0, 2, g, td)) expect_visible(fgrowth(m, -2:2, 1:2, g, td)) expect_visible(fgrowth(m, 1:2, 1:2, g, td)) expect_visible(fgrowth(m, -1:-2, 1:2, g, td)) expect_visible(fgrowth(m, 0, 1:2, g, td)) }) test_that("fgrowth performs lagged/leaded and iterated (panel-) data.frame growth rates without errors", { expect_visible(fgrowth(data, -2:2)) expect_visible(fgrowth(data, 1:2)) expect_visible(fgrowth(data, -1:-2)) expect_visible(fgrowth(data, 0)) expect_visible(fgrowth(data, -2:2, 2)) expect_visible(fgrowth(data, 1:2, 2)) expect_visible(fgrowth(data, -1:-2, 2)) expect_visible(fgrowth(data, 0, 2)) expect_visible(fgrowth(data, -2:2, 1:2)) expect_visible(fgrowth(data, 1:2, 1:2)) expect_visible(fgrowth(data, -1:-2, 1:2)) expect_visible(fgrowth(data, 0, 1:2)) expect_visible(fgrowth(data, -2:2, 1, g)) expect_visible(fgrowth(data, 1:2, 1, g)) expect_visible(fgrowth(data, -1:-2, 1, g)) expect_visible(fgrowth(data, 0, 1, g)) expect_visible(fgrowth(data, -2:2, 2, g)) expect_visible(fgrowth(data, 1:2, 2, g)) expect_visible(fgrowth(data, -1:-2, 2, g)) expect_visible(fgrowth(data, 0, 2, g)) expect_visible(fgrowth(data, -2:2, 1:2, g)) expect_visible(fgrowth(data, 1:2, 1:2, g)) expect_visible(fgrowth(data, -1:-2, 1:2, g)) expect_visible(fgrowth(data, 0, 1:2, g)) expect_visible(fgrowth(data, -2:2, 1, g, td)) expect_visible(fgrowth(data, 1:2, 1, g, td)) expect_visible(fgrowth(data, -1:-2, 1, g, td)) expect_visible(fgrowth(data, 0, 1, g, td)) expect_visible(fgrowth(data, -2:2, 2, g, td)) expect_visible(fgrowth(data, 1:2, 2, g, td)) expect_visible(fgrowth(data, -1:-2, 2, g, td)) expect_visible(fgrowth(data, 0, 2, g, td)) expect_visible(fgrowth(data, -2:2, 1:2, g, td)) expect_visible(fgrowth(data, 1:2, 1:2, g, td)) expect_visible(fgrowth(data, -1:-2, 1:2, g, td)) expect_visible(fgrowth(data, 0, 1:2, g, td)) }) test_that("fgrowth correctly handles unordered time-series and panel-series computations", { expect_equal(fgrowth(x, -2:2, 1:2, t = 1:100), fgrowth(x, -2:2, 1:2)) expect_equal(fgrowth(xNA, -2:2, 1:2, t = 1:100), fgrowth(xNA, -2:2, 1:2)) expect_equal(fgrowth(m, -2:2, 1:2, t = seq_along(td)), fgrowth(m, -2:2, 1:2)) expect_equal(fgrowth(data, -2:2, 1:2, t = seq_along(td)), fgrowth(data, -2:2, 1:2)) expect_equal(fgrowth(xuo, -2:2, 1:2, t = t2uo)[o,], unclass(fgrowth(x, -2:2, 1:2))) expect_equal(fgrowth(xNAuo, -2:2, 1:2, t = t2uo)[o,], unclass(fgrowth(xNA, -2:2, 1:2))) expect_equal(fgrowth(muo, -2:2, 1:2, t = t2duo)[od,], unclass(fgrowth(m, -2:2, 1:2))) expect_equal(fgrowth(datauo, -2:2, 1:2, t = t2duo)[od,], fgrowth(data, -2:2, 1:2)) expect_equal(fgrowth(xuo, -2:2, 1:2, fuo, tuo)[o,], unclass(fgrowth(x, -2:2, 1:2, f, t))) expect_equal(fgrowth(xNAuo, -2:2, 1:2, fuo, tuo)[o,], unclass(fgrowth(xNA, -2:2, 1:2, f, t))) expect_equal(fgrowth(muo, -2:2, 1:2, guo, tduo)[od,], unclass(fgrowth(m, -2:2, 1:2, g, td))) expect_equal(fgrowth(datauo, -2:2, 1:2, guo, tduo)[od,], fgrowth(data, -2:2, 1:2, g, td)) }) test_that("fgrowth performs numerically stable in ordered computations", { expect_true(all_obj_equal(replicate(50, fgrowth(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(data), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(dataNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(x, 1, 1, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(x, -2:2, 1:2, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xNA, 1, 1, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xNA, -2:2, 1:2, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(m, 1, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(m, -2:2, 1:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(mNA, 1, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(mNA, -2:2, 1:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(data, 1, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(data, -2:2, 1:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(dataNA, 1, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(dataNA, -2:2, 1:2, g), simplify = FALSE))) }) test_that("fgrowth performs numerically stable in unordered computations", { expect_true(all_obj_equal(replicate(50, fgrowth(xuo, t = t2uo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xNAuo, t = t2uo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(muo, t = t2duo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(datauo, t = t2duo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xuo, 1, 1, fuo, tuo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xuo, -2:2, 1:2, fuo, tuo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(muo, 1, 1, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(muo, -2:2, 1:2, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(datauo, 1, 1, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(datauo, -2:2, 1:2, guo, tduo), simplify = FALSE))) }) test_that("fgrowth handles special values in the right way", { expect_equal(fgrowth(c(1,NA)), c(NA_real_,NA_real_)) expect_equal(fgrowth(c(NA,1)), c(NA_real_,NA_real_)) expect_equal(fgrowth(c(NaN,1)), c(NA_real_,NaN)) expect_equal(fgrowth(c(1,NaN)), c(NA_real_,NaN)) expect_equal(fgrowth(c(Inf,1)), c(NA,NaN)) expect_equal(fgrowth(c(1,Inf)), c(NA,Inf)) expect_equal(fgrowth(c(Inf,NA)), c(NA_real_,NA_real_)) expect_equal(fgrowth(c(NA,Inf)), c(NA_real_,NA_real_)) expect_equal(fgrowth(c(Inf,-Inf)), c(NA,NaN)) expect_equal(fgrowth(c(-Inf,Inf)), c(NA,NaN)) expect_equal(fgrowth(c(Inf,Inf)), c(NA,NaN)) expect_equal(fgrowth(c(TRUE,TRUE)), c(NA_real_,0)) expect_equal(fgrowth(c(TRUE,FALSE)), c(NA_real_,-100)) expect_equal(fgrowth(c(FALSE,TRUE)), c(NA_real_,Inf)) }) test_that("fgrowth produces errors for wrong input", { # wrong type: normally guaranteed by C++ expect_error(fgrowth("a")) expect_error(fgrowth(NA_character_)) expect_error(fgrowth(mNAc)) expect_error(fgrowth(wlddev)) expect_error(fgrowth(mNAc, f)) expect_error(fgrowth(x, "1", "2")) # if n*growth equals or exceeds length(x), should give error expect_error(fgrowth(x,100)) expect_error(fgrowth(x,1,100)) expect_error(fgrowth(x,50,2)) expect_error(fgrowth(x,20,5)) # if n*growth exceeds average group size, should give error # expect_warning(fgrowth(x,11,1,f)) -> some fail on i386 # expect_warning(fgrowth(x,1,11,f)) # expect_warning(fgrowth(x,1,11,f,t)) # expect_warning(fgrowth(x,11,1,f,t)) # passing repeated n-values or non-positive or non-consecutive growth values should give error expect_error(fgrowth(x,c(1,1))) expect_error(fgrowth(x,c(-1,-1))) expect_error(fgrowth(x,1,c(1,1))) expect_error(fgrowth(x,1,c(-1,-1))) expect_error(fgrowth(x,1,2:1)) expect_error(fgrowth(x,1,0)) expect_error(fgrowth(x,1,-1)) expect_error(fgrowth(x,f)) # common source of error probably is passing the factor in a wrong slot expect_error(fgrowth(x,1,f)) expect_error(fgrowth(x,c(1,1),1,f)) expect_error(fgrowth(x,c(1,1),1,f,t)) expect_error(fgrowth(x,1,c(1,1),f)) expect_error(fgrowth(x,1,c(1,1),f,t)) expect_error(fgrowth(x,1,2:1,f)) expect_error(fgrowth(x,1,2:1,f,t)) expect_error(fgrowth(x,1,0,f)) expect_error(fgrowth(x,1,0,f,t)) expect_error(fgrowth(x,1,-1,f)) expect_error(fgrowth(x,1,-1,f,t)) # repeated values or gaps in time-variable should give error expect_error(fgrowth(1:3, t = c(1,1,2))) expect_error(fgrowth(1:3, t = c(1,2,2))) expect_error(fgrowth(1:3, t = c(1,2,1))) expect_error(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,2,1:3,1:4))) # This is the only possible statement which does not throw a reteated timevar error because the first C++ index is 0, and omap is also initialized with 0's. expect_error(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,1,1:3,1:4))) expect_error(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1,1,3,4))) expect_error(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,2,1:3,1:4))) expect_visible(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))) expect_error(fgrowth(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))) expect_error(fgrowth(1:10, g = c(1,2,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1:4))) expect_visible(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))) expect_error(fgrowth(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))) # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(fgrowth(1:3, t = 1:2)) expect_error(fgrowth(1:3, t = 1:4)) expect_error(fgrowth(1:3, g = 1:2)) expect_error(fgrowth(1:3, g = 1:4)) expect_error(fgrowth(1:4, g = c(1,1,2,2), t = c(1,2,1))) expect_error(fgrowth(1:4, g = c(1,2,2), t = c(1,2,1,2))) }) # G test_that("G produces errors for wrong input", { # wrong type: normally guaranteed by C++ expect_error(G("a")) expect_error(G(NA_character_)) expect_error(G(mNAc)) expect_visible(G(wlddev)) expect_error(G(mNAc, f)) expect_error(G(x, "1", "2")) # if n*diff equals or exceeds length(x), should give error expect_error(G(x,100)) expect_error(G(x,1,100)) expect_error(G(x,50,2)) expect_error(G(x,20,5)) # if n*diff exceeds average group size, should give error # expect_warning(G(x,11,1,f)) -> Some fail on i386 # expect_warning(G(x,1,11,f)) # expect_warning(G(x,1,11,f,t)) # expect_warning(G(x,11,1,f,t)) # passing repeated n-values or non-positive or non-consecutive diff values should give error expect_error(G(x,c(1,1))) expect_error(G(x,c(-1,-1))) expect_error(G(x,1,c(1,1))) expect_error(G(x,1,c(-1,-1))) expect_error(G(x,1,2:1)) expect_error(G(x,1,0)) expect_error(G(x,1,-1)) expect_error(G(x,f)) # common source of error probably is passing the factor in a wrong slot expect_error(G(x,1,f)) expect_error(G(x,c(1,1),1,f)) expect_error(G(x,c(1,1),1,f,t)) expect_error(G(x,1,c(1,1),f)) expect_error(G(x,1,c(1,1),f,t)) expect_error(G(x,1,2:1,f)) expect_error(G(x,1,2:1,f,t)) expect_error(G(x,1,0,f)) expect_error(G(x,1,0,f,t)) expect_error(G(x,1,-1,f)) expect_error(G(x,1,-1,f,t)) # repeated values or gaps in time-variable should give error expect_error(G(1:3, t = c(1,1,2))) expect_error(G(1:3, t = c(1,2,2))) expect_error(G(1:3, t = c(1,2,1))) expect_error(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,2,1:3,1:4))) # This is the only possible statement which does not throw a reteated timevar error because the first C++ index is 0, and omap is also initialized with 0's. expect_error(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,1,1:3,1:4))) expect_error(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1,1,3,4))) expect_error(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,2,1:3,1:4))) expect_visible(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))) expect_error(G(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))) expect_error(G(1:10, g = c(1,2,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1:4))) expect_visible(G(1:10, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))) expect_error(G(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))) # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(G(1:3, t = 1:2)) expect_error(G(1:3, t = 1:4)) expect_error(G(1:3, g = 1:2)) expect_error(G(1:3, g = 1:4)) expect_error(G(1:4, g = c(1,1,2,2), t = c(1,2,1))) expect_error(G(1:4, g = c(1,2,2), t = c(1,2,1,2))) }) test_that("G.data.frame method is foolproof", { expect_visible(G(wlddev)) expect_visible(G(wlddev, by = wlddev$iso3c)) expect_error(G(wlddev, t = ~year)) expect_visible(G(wlddev, 1, 1, wlddev$iso3c)) expect_visible(G(wlddev, 1,1, ~iso3c)) expect_error(G(wlddev, 1, ~iso3c)) expect_visible(G(wlddev, 1, 1, ~iso3c + region)) expect_visible(G(wlddev, 1,1, wlddev$iso3c, wlddev$year)) expect_visible(G(wlddev, 1,1, ~iso3c, ~year)) expect_visible(G(wlddev, cols = 9:12)) expect_visible(G(wlddev, 1,1,~iso3c, cols = 9:12)) expect_visible(G(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = 9:12)) expect_visible(G(wlddev, 1,1,~iso3c, ~year, cols = 9:12)) expect_visible(G(wlddev, cols = c("PCGDP","LIFEEX"))) expect_visible(G(wlddev, 1,1,wlddev$iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(G(wlddev, 1,1,~iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(G(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(G(wlddev, 1,1,~iso3c, ~year, cols = c("PCGDP","LIFEEX"))) expect_error(G(wlddev, cols = NULL)) expect_error(G(wlddev, 1,1,wlddev$iso3c, cols = NULL)) expect_error(G(wlddev, 1,1,~iso3c, cols = NULL)) expect_error(G(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = NULL)) expect_error(G(wlddev, 1,1,~iso3c, ~year, cols = NULL)) expect_error(G(wlddev, cols = 9:14)) expect_error(G(wlddev, 1,1,~iso3c, ~year, cols = 9:14)) expect_error(G(wlddev, cols = c("PCGDP","LIFEEX","bla"))) expect_error(G(wlddev, 1,1,~iso3c, ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_warning(G(wlddev, w = 4)) expect_error(G(wlddev, t = "year")) expect_warning(G(wlddev, g = ~year2)) expect_error(G(wlddev, t = ~year + region)) expect_error(G(wlddev, data)) expect_error(G(wlddev, 1,1,"iso3c")) expect_error(G(wlddev, 1,1,~iso3c2)) expect_error(G(wlddev, 1,1,~iso3c + bla)) expect_error(G(wlddev, 1,1,t = rnorm(30))) expect_warning(G(wlddev, 1,1,g = rnorm(30))) expect_error(G(wlddev, 1,1,mtcars$mpg, 1:29)) expect_error(G(wlddev, 1,1,mtcars$mpg, mtcars$cyl)) # this gives a repeated values error first because length(g) == length(t) expect_error(G(wlddev,1,1, ~iso3c2, ~year2)) expect_error(G(wlddev, cols = ~bla)) expect_visible(G(wlddev, 1,1,wlddev$iso3c, ~year, cols = 9:12)) expect_visible(G(wlddev, 1,1,~iso3c, wlddev$year, cols = 9:12)) expect_error(G(wlddev, 1,1,wlddev$iso3c, ~year + bla, cols = 9:12)) expect_error(G(wlddev, 2,1,~iso3c3, ~year, cols = 9:12)) expect_error(G(wlddev, cols = c("PC3GDP","LIFEEX"))) }) # fgrowth with logdiff option: test_that("fgrowth with logdiff performs like baselogdiff", { expect_equal(fgrowth(1:10, logdiff = TRUE), baselogdiff(1:10)) expect_equal(fgrowth(1:10, 2, logdiff = TRUE), baselogdiff(1:10, 2)) # expect_equal(fgrowth(-1:1, logdiff = TRUE), suppressWarnings(baselogdiff(-1:1))) # NaN -Inf mismatch expect_equal(fgrowth(x, logdiff = TRUE), baselogdiff(x)) expect_equal(fgrowth(x, 2, logdiff = TRUE), baselogdiff(x, 2)) expect_equal(fgrowth(xNA, logdiff = TRUE), baselogdiff(xNA)) expect_equal(fgrowth(xNA, 2, logdiff = TRUE), baselogdiff(xNA, 2)) expect_equal(qM(fgrowth(data, logdiff = TRUE)), setRownames(fgrowth(m, logdiff = TRUE), NULL)) expect_equal(fgrowth(m, stubs = FALSE, logdiff = TRUE), dapply(m, baselogdiff)) expect_equal(fgrowth(m, 2, stubs = FALSE, logdiff = TRUE), dapply(m, baselogdiff, 2)) expect_equal(fgrowth(mNA, stubs = FALSE, logdiff = TRUE), dapply(mNA, baselogdiff)) expect_equal(fgrowth(mNA, 2, stubs = FALSE, logdiff = TRUE), dapply(mNA, baselogdiff, 2)) expect_equal(fgrowth(x, 1, 1, f, logdiff = TRUE), BY(x, f, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(x, 2, 1, f, logdiff = TRUE), BY(x, f, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(xNA, 1, 1, f, logdiff = TRUE), BY(xNA, f, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(xNA, 2, 1, f, logdiff = TRUE), BY(xNA, f, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(m, 1, 1, g, stubs = FALSE, logdiff = TRUE), BY(m, g, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(m, 2, 1, g, stubs = FALSE, logdiff = TRUE), BY(m, g, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(mNA, 1, 1, g, stubs = FALSE, logdiff = TRUE), BY(mNA, g, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(mNA, 2, 1, g, stubs = FALSE, logdiff = TRUE), BY(mNA, g, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(data, 1, 1, g, stubs = FALSE, logdiff = TRUE), BY(data, g, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(data, 2, 1, g, stubs = FALSE, logdiff = TRUE), BY(data, g, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(dataNA, 1, 1, g, stubs = FALSE, logdiff = TRUE), BY(dataNA, g, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(dataNA, 2, 1, g, stubs = FALSE, logdiff = TRUE), BY(dataNA, g, baselogdiff, 2, use.g.names = FALSE)) # Adding time-variable: Computing fully identified panel-growtherences !! expect_equal(fgrowth(x, 1, 1, f, t, logdiff = TRUE), BY(x, f, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(x, 2, 1, f, t, logdiff = TRUE), BY(x, f, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(xNA, 1, 1, f, t, logdiff = TRUE), BY(xNA, f, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(xNA, 2, 1, f, t, logdiff = TRUE), BY(xNA, f, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(m, 1, 1, g, td, stubs = FALSE, logdiff = TRUE), BY(m, g, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(m, 2, 1, g, td, stubs = FALSE, logdiff = TRUE), BY(m, g, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(mNA, 1, 1, g, td, stubs = FALSE, logdiff = TRUE), BY(mNA, g, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(mNA, 2, 1, g, td, stubs = FALSE, logdiff = TRUE), BY(mNA, g, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(data, 1, 1, g, td, stubs = FALSE, logdiff = TRUE), BY(data, g, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(data, 2, 1, g, td, stubs = FALSE, logdiff = TRUE), BY(data, g, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(dataNA, 1, 1, g, td, stubs = FALSE, logdiff = TRUE), BY(dataNA, g, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(dataNA, 2, 1, g, td, stubs = FALSE, logdiff = TRUE), BY(dataNA, g, baselogdiff, 2, use.g.names = FALSE)) }) test_that("fgrowth with logdiff performs lagged/leaded and iterated (panel-) vector growth reates without errors", { expect_visible(fgrowth(1:10, -2:2, logdiff = TRUE)) expect_visible(fgrowth(1:10, 1:2, logdiff = TRUE)) expect_visible(fgrowth(1:10, -1:-2, logdiff = TRUE)) expect_visible(fgrowth(1:10, 0, logdiff = TRUE)) expect_visible(fgrowth(1:10, -2:2, 2, logdiff = TRUE)) expect_visible(fgrowth(1:10, 1:2, 2, logdiff = TRUE)) expect_visible(fgrowth(1:10, -1:-2, 2, logdiff = TRUE)) expect_visible(fgrowth(1:10, 0, 2, logdiff = TRUE)) expect_visible(fgrowth(1:10, -2:2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(1:10, 1:2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(1:10, -1:-2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(1:10, 0, 1:2, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, 2, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, 2, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, 2, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, 2, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, 1:2, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, 1, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, 1, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, 1, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, 1, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, 2, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, 2, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, 2, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, 2, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, 1:2, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, 1:2, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, 1:2, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, 1:2, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, 1, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, 1, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, 1, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, 1, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, 2, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, 2, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, 2, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, 2, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, 1:2, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, 1:2, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, 1:2, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, 1:2, f, t, logdiff = TRUE)) }) test_that("fgrowth with logdiff performs lagged/leaded and iterated (panel-) matrix growth rates without errors", { expect_visible(fgrowth(m, -2:2, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, logdiff = TRUE)) expect_visible(fgrowth(m, 0, logdiff = TRUE)) expect_visible(fgrowth(m, -2:2, 2, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, 2, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, 2, logdiff = TRUE)) expect_visible(fgrowth(m, 0, 2, logdiff = TRUE)) expect_visible(fgrowth(m, -2:2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(m, 0, 1:2, logdiff = TRUE)) expect_visible(fgrowth(m, -2:2, 1, g, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, 1, g, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, 1, g, logdiff = TRUE)) expect_visible(fgrowth(m, 0, 1, g, logdiff = TRUE)) expect_visible(fgrowth(m, -2:2, 2, g, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, 2, g, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, 2, g, logdiff = TRUE)) expect_visible(fgrowth(m, 0, 2, g, logdiff = TRUE)) expect_visible(fgrowth(m, -2:2, 1:2, g, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, 1:2, g, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, 1:2, g, logdiff = TRUE)) expect_visible(fgrowth(m, 0, 1:2, g, logdiff = TRUE)) expect_visible(fgrowth(m, -2:2, 1, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, 1, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, 1, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, 0, 1, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, -2:2, 2, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, 2, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, 2, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, 0, 2, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, -2:2, 1:2, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, 1:2, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, 1:2, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, 0, 1:2, g, td, logdiff = TRUE)) }) test_that("fgrowth with logdiff performs lagged/leaded and iterated (panel-) data.frame growth rates without errors", { expect_visible(fgrowth(data, -2:2, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, logdiff = TRUE)) expect_visible(fgrowth(data, 0, logdiff = TRUE)) expect_visible(fgrowth(data, -2:2, 2, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, 2, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, 2, logdiff = TRUE)) expect_visible(fgrowth(data, 0, 2, logdiff = TRUE)) expect_visible(fgrowth(data, -2:2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(data, 0, 1:2, logdiff = TRUE)) expect_visible(fgrowth(data, -2:2, 1, g, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, 1, g, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, 1, g, logdiff = TRUE)) expect_visible(fgrowth(data, 0, 1, g, logdiff = TRUE)) expect_visible(fgrowth(data, -2:2, 2, g, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, 2, g, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, 2, g, logdiff = TRUE)) expect_visible(fgrowth(data, 0, 2, g, logdiff = TRUE)) expect_visible(fgrowth(data, -2:2, 1:2, g, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, 1:2, g, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, 1:2, g, logdiff = TRUE)) expect_visible(fgrowth(data, 0, 1:2, g, logdiff = TRUE)) expect_visible(fgrowth(data, -2:2, 1, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, 1, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, 1, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, 0, 1, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, -2:2, 2, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, 2, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, 2, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, 0, 2, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, -2:2, 1:2, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, 1:2, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, 1:2, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, 0, 1:2, g, td, logdiff = TRUE)) }) test_that("fgrowth with logdiff correctly handles unordered time-series and panel-series computations", { expect_equal(fgrowth(x, -2:2, 1:2, t = 1:100, logdiff = TRUE), fgrowth(x, -2:2, 1:2, logdiff = TRUE)) expect_equal(fgrowth(xNA, -2:2, 1:2, t = 1:100, logdiff = TRUE), fgrowth(xNA, -2:2, 1:2, logdiff = TRUE)) expect_equal(fgrowth(m, -2:2, 1:2, t = seq_along(td), logdiff = TRUE), fgrowth(m, -2:2, 1:2, logdiff = TRUE)) expect_equal(fgrowth(data, -2:2, 1:2, t = seq_along(td), logdiff = TRUE), fgrowth(data, -2:2, 1:2, logdiff = TRUE)) expect_equal(fgrowth(xuo, -2:2, 1:2, t = t2uo, logdiff = TRUE)[o,], unclass(fgrowth(x, -2:2, 1:2, logdiff = TRUE))) expect_equal(fgrowth(xNAuo, -2:2, 1:2, t = t2uo, logdiff = TRUE)[o,], unclass(fgrowth(xNA, -2:2, 1:2, logdiff = TRUE))) expect_equal(fgrowth(muo, -2:2, 1:2, t = t2duo, logdiff = TRUE)[od,], unclass(fgrowth(m, -2:2, 1:2, logdiff = TRUE))) expect_equal(fgrowth(datauo, -2:2, 1:2, t = t2duo, logdiff = TRUE)[od,], fgrowth(data, -2:2, 1:2, logdiff = TRUE)) expect_equal(fgrowth(xuo, -2:2, 1:2, fuo, tuo, logdiff = TRUE)[o,], unclass(fgrowth(x, -2:2, 1:2, f, t, logdiff = TRUE))) expect_equal(fgrowth(xNAuo, -2:2, 1:2, fuo, tuo, logdiff = TRUE)[o,], unclass(fgrowth(xNA, -2:2, 1:2, f, t, logdiff = TRUE))) expect_equal(fgrowth(muo, -2:2, 1:2, guo, tduo, logdiff = TRUE)[od,], unclass(fgrowth(m, -2:2, 1:2, g, td, logdiff = TRUE))) expect_equal(fgrowth(datauo, -2:2, 1:2, guo, tduo, logdiff = TRUE)[od,], fgrowth(data, -2:2, 1:2, g, td, logdiff = TRUE)) }) test_that("fgrowth with logdiff performs numerically stable in ordered computations", { expect_true(all_obj_equal(replicate(50, fgrowth(x, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xNA, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(m, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(mNA, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(data, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(dataNA, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(x, 1, 1, f, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(x, -2:2, 1:2, f, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xNA, 1, 1, f, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xNA, -2:2, 1:2, f, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(m, 1, 1, g, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(m, -2:2, 1:2, g, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(mNA, 1, 1, g, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(mNA, -2:2, 1:2, g, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(data, 1, 1, g, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(data, -2:2, 1:2, g, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(dataNA, 1, 1, g, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(dataNA, -2:2, 1:2, g, logdiff = TRUE), simplify = FALSE))) }) test_that("fgrowth with logdiff performs numerically stable in unordered computations", { expect_true(all_obj_equal(replicate(50, fgrowth(xuo, t = t2uo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xNAuo, t = t2uo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(muo, t = t2duo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(datauo, t = t2duo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xuo, 1, 1, fuo, tuo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xuo, -2:2, 1:2, fuo, tuo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(muo, 1, 1, guo, tduo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(muo, -2:2, 1:2, guo, tduo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(datauo, 1, 1, guo, tduo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(datauo, -2:2, 1:2, guo, tduo, logdiff = TRUE), simplify = FALSE))) }) options(warn = -1) test_that("fgrowth with logdiff handles special values in the right way", { expect_equal(fgrowth(c(1,NA), logdiff = TRUE), c(NA_real_,NaN)) expect_equal(fgrowth(c(NA,1), logdiff = TRUE), c(NA_real_,NaN)) expect_equal(fgrowth(c(NaN,1), logdiff = TRUE), c(NA_real_,NaN)) expect_equal(fgrowth(c(1,NaN), logdiff = TRUE), c(NA_real_,NaN)) expect_equal(fgrowth(c(Inf,1), logdiff = TRUE), c(NA,-Inf)) # ?? expect_equal(fgrowth(c(1,Inf), logdiff = TRUE), c(NA,Inf)) expect_equal(fgrowth(c(Inf,NA), logdiff = TRUE), c(NA_real_,NaN)) expect_equal(fgrowth(c(NA,Inf), logdiff = TRUE), c(NA_real_,NaN)) expect_equal(fgrowth(c(Inf,-Inf), logdiff = TRUE), c(NA,NaN)) expect_equal(fgrowth(c(-Inf,Inf), logdiff = TRUE), c(NA,NaN)) expect_equal(fgrowth(c(Inf,Inf), logdiff = TRUE), c(NA,NaN)) expect_equal(fgrowth(c(TRUE,TRUE), logdiff = TRUE), c(NA_real_,0)) expect_equal(fgrowth(c(TRUE,FALSE), logdiff = TRUE), c(NA_real_,-Inf)) # ?? expect_equal(fgrowth(c(FALSE,TRUE), logdiff = TRUE), c(NA_real_,Inf)) }) test_that("fgrowth with logdiff produces errors for wrong input", { # wrong type: normally guaranteed by C++ expect_error(fgrowth("a", logdiff = TRUE)) expect_error(fgrowth(NA_character_, logdiff = TRUE)) expect_error(fgrowth(mNAc, logdiff = TRUE)) expect_error(fgrowth(wlddev, logdiff = TRUE)) expect_error(fgrowth(mNAc, f, logdiff = TRUE)) expect_error(fgrowth(x, "1", "2", logdiff = TRUE)) # if n*growth equals or exceeds length(x), should give error expect_error(fgrowth(x,100, logdiff = TRUE)) expect_error(fgrowth(x,1,100, logdiff = TRUE)) expect_error(fgrowth(x,50,2, logdiff = TRUE)) expect_error(fgrowth(x,20,5, logdiff = TRUE)) # if n*growth exceeds average group size, should give error # expect_warning(fgrowth(x,11,1,f, logdiff = TRUE)) -> some fail on i386 # expect_warning(fgrowth(x,1,11,f, logdiff = TRUE)) # expect_warning(fgrowth(x,1,11,f,t, logdiff = TRUE)) # expect_warning(fgrowth(x,11,1,f,t, logdiff = TRUE)) # passing repeated n-values or non-positive or non-consecutive growth values should give error expect_error(fgrowth(x,c(1,1), logdiff = TRUE)) expect_error(fgrowth(x,c(-1,-1), logdiff = TRUE)) expect_error(fgrowth(x,1,c(1,1), logdiff = TRUE)) expect_error(fgrowth(x,1,c(-1,-1), logdiff = TRUE)) expect_error(fgrowth(x,1,2:1, logdiff = TRUE)) expect_error(fgrowth(x,1,0, logdiff = TRUE)) expect_error(fgrowth(x,1,-1, logdiff = TRUE)) expect_error(fgrowth(x,f, logdiff = TRUE)) # common source of error probably is passing the factor in a wrong slot expect_error(fgrowth(x,1,f, logdiff = TRUE)) expect_error(fgrowth(x,c(1,1),1,f, logdiff = TRUE)) expect_error(fgrowth(x,c(1,1),1,f,t, logdiff = TRUE)) expect_error(fgrowth(x,1,c(1,1),f, logdiff = TRUE)) expect_error(fgrowth(x,1,c(1,1),f,t, logdiff = TRUE)) expect_error(fgrowth(x,1,2:1,f, logdiff = TRUE)) expect_error(fgrowth(x,1,2:1,f,t, logdiff = TRUE)) expect_error(fgrowth(x,1,0,f, logdiff = TRUE)) expect_error(fgrowth(x,1,0,f,t, logdiff = TRUE)) expect_error(fgrowth(x,1,-1,f, logdiff = TRUE)) expect_error(fgrowth(x,1,-1,f,t, logdiff = TRUE)) # repeated values or gaps in time-variable should give error expect_error(fgrowth(1:3, t = c(1,1,2), logdiff = TRUE)) expect_error(fgrowth(1:3, t = c(1,2,2), logdiff = TRUE)) expect_error(fgrowth(1:3, t = c(1,2,1), logdiff = TRUE)) expect_error(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,2,1:3,1:4), logdiff = TRUE)) # This is the only possible statement which does not throw a reteated timevar error because the first C++ index is 0, and omap is also initialized with 0's. expect_error(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,1,1:3,1:4), logdiff = TRUE)) expect_error(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1,1,3,4), logdiff = TRUE)) expect_error(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,2,1:3,1:4), logdiff = TRUE)) expect_visible(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4), logdiff = TRUE)) expect_error(fgrowth(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4), logdiff = TRUE)) expect_error(fgrowth(1:10, g = c(1,2,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1:4), logdiff = TRUE)) expect_visible(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4), logdiff = TRUE)) expect_error(fgrowth(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4), logdiff = TRUE)) # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(fgrowth(1:3, t = 1:2, logdiff = TRUE)) expect_error(fgrowth(1:3, t = 1:4, logdiff = TRUE)) expect_error(fgrowth(1:3, g = 1:2, logdiff = TRUE)) expect_error(fgrowth(1:3, g = 1:4, logdiff = TRUE)) expect_error(fgrowth(1:4, g = c(1,1,2,2), t = c(1,2,1), logdiff = TRUE)) expect_error(fgrowth(1:4, g = c(1,2,2), t = c(1,2,1,2), logdiff = TRUE)) }) # G with logdiff test_that("G with logdiff produces errors for wrong input", { # wrong type: normally guaranteed by C++ expect_error(G("a", logdiff = TRUE)) expect_error(G(NA_character_, logdiff = TRUE)) expect_error(G(mNAc, logdiff = TRUE)) expect_visible(G(wlddev, logdiff = TRUE)) expect_error(G(mNAc, f, logdiff = TRUE)) expect_error(G(x, "1", "2", logdiff = TRUE)) # if n*diff equals or exceeds length(x), should give error expect_error(G(x,100, logdiff = TRUE)) expect_error(G(x,1,100, logdiff = TRUE)) expect_error(G(x,50,2, logdiff = TRUE)) expect_error(G(x,20,5, logdiff = TRUE)) # if n*diff exceeds average group size, should give error # expect_warning(G(x,11,1,f, logdiff = TRUE)) -> Some fail on i386 # expect_warning(G(x,1,11,f, logdiff = TRUE)) # expect_warning(G(x,1,11,f,t, logdiff = TRUE)) # expect_warning(G(x,11,1,f,t, logdiff = TRUE)) # passing repeated n-values or non-positive or non-consecutive diff values should give error expect_error(G(x,c(1,1), logdiff = TRUE)) expect_error(G(x,c(-1,-1), logdiff = TRUE)) expect_error(G(x,1,c(1,1), logdiff = TRUE)) expect_error(G(x,1,c(-1,-1), logdiff = TRUE)) expect_error(G(x,1,2:1, logdiff = TRUE)) expect_error(G(x,1,0, logdiff = TRUE)) expect_error(G(x,1,-1, logdiff = TRUE)) expect_error(G(x,f, logdiff = TRUE)) # common source of error probably is passing the factor in a wrong slot expect_error(G(x,1,f, logdiff = TRUE)) expect_error(G(x,c(1,1),1,f, logdiff = TRUE)) expect_error(G(x,c(1,1),1,f,t, logdiff = TRUE)) expect_error(G(x,1,c(1,1),f, logdiff = TRUE)) expect_error(G(x,1,c(1,1),f,t, logdiff = TRUE)) expect_error(G(x,1,2:1,f, logdiff = TRUE)) expect_error(G(x,1,2:1,f,t, logdiff = TRUE)) expect_error(G(x,1,0,f, logdiff = TRUE)) expect_error(G(x,1,0,f,t, logdiff = TRUE)) expect_error(G(x,1,-1,f, logdiff = TRUE)) expect_error(G(x,1,-1,f,t, logdiff = TRUE)) # repeated values or gaps in time-variable should give error expect_error(G(1:3, t = c(1,1,2), logdiff = TRUE)) expect_error(G(1:3, t = c(1,2,2), logdiff = TRUE)) expect_error(G(1:3, t = c(1,2,1), logdiff = TRUE)) expect_error(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,2,1:3,1:4), logdiff = TRUE)) # This is the only possible statement which does not throw a reteated timevar error because the first C++ index is 0, and omap is also initialized with 0's. expect_error(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,1,1:3,1:4), logdiff = TRUE)) expect_error(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1,1,3,4), logdiff = TRUE)) expect_error(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,2,1:3,1:4), logdiff = TRUE)) expect_visible(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4), logdiff = TRUE)) expect_error(G(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4), logdiff = TRUE)) expect_error(G(1:10, g = c(1,2,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1:4), logdiff = TRUE)) expect_visible(G(1:10, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4), logdiff = TRUE)) expect_error(G(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4), logdiff = TRUE)) # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(G(1:3, t = 1:2, logdiff = TRUE)) expect_error(G(1:3, t = 1:4, logdiff = TRUE)) expect_error(G(1:3, g = 1:2, logdiff = TRUE)) expect_error(G(1:3, g = 1:4, logdiff = TRUE)) expect_error(G(1:4, g = c(1,1,2,2), t = c(1,2,1), logdiff = TRUE)) expect_error(G(1:4, g = c(1,2,2), t = c(1,2,1,2), logdiff = TRUE)) }) test_that("G.data.frame method with logdiff is foolproof", { expect_visible(G(wlddev, logdiff = TRUE)) expect_visible(G(wlddev, by = wlddev$iso3c, logdiff = TRUE)) expect_error(G(wlddev, t = ~year, logdiff = TRUE)) expect_visible(G(wlddev, 1, 1, wlddev$iso3c, logdiff = TRUE)) expect_visible(G(wlddev, 1,1, ~iso3c, logdiff = TRUE)) expect_error(G(wlddev, 1, ~iso3c, logdiff = TRUE)) expect_visible(G(wlddev, 1, 1, ~iso3c + region, logdiff = TRUE)) expect_visible(G(wlddev, 1,1, wlddev$iso3c, wlddev$year, logdiff = TRUE)) expect_visible(G(wlddev, 1,1, ~iso3c, ~year, logdiff = TRUE)) expect_visible(G(wlddev, cols = 9:12, logdiff = TRUE)) expect_visible(G(wlddev, 1,1,~iso3c, cols = 9:12, logdiff = TRUE)) expect_visible(G(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = 9:12, logdiff = TRUE)) expect_visible(G(wlddev, 1,1,~iso3c, ~year, cols = 9:12, logdiff = TRUE)) expect_visible(G(wlddev, cols = c("PCGDP","LIFEEX"), logdiff = TRUE)) expect_visible(G(wlddev, 1,1,wlddev$iso3c, cols = c("PCGDP","LIFEEX"), logdiff = TRUE)) expect_visible(G(wlddev, 1,1,~iso3c, cols = c("PCGDP","LIFEEX"), logdiff = TRUE)) expect_visible(G(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX"), logdiff = TRUE)) expect_visible(G(wlddev, 1,1,~iso3c, ~year, cols = c("PCGDP","LIFEEX"), logdiff = TRUE)) expect_error(G(wlddev, cols = NULL, logdiff = TRUE)) expect_error(G(wlddev, 1,1,wlddev$iso3c, cols = NULL, logdiff = TRUE)) expect_error(G(wlddev, 1,1,~iso3c, cols = NULL, logdiff = TRUE)) expect_error(G(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = NULL, logdiff = TRUE)) expect_error(G(wlddev, 1,1,~iso3c, ~year, cols = NULL, logdiff = TRUE)) expect_error(G(wlddev, cols = 9:14, logdiff = TRUE)) expect_error(G(wlddev, 1,1,~iso3c, ~year, cols = 9:14, logdiff = TRUE)) expect_error(G(wlddev, cols = c("PCGDP","LIFEEX","bla"), logdiff = TRUE)) expect_error(G(wlddev, 1,1,~iso3c, ~year, cols = c("PCGDP","LIFEEX","bla"), logdiff = TRUE)) expect_warning(G(wlddev, w = 4, logdiff = TRUE)) expect_error(G(wlddev, t = "year", logdiff = TRUE)) expect_warning(G(wlddev, g = ~year2, logdiff = TRUE)) expect_error(G(wlddev, t = ~year + region, logdiff = TRUE)) expect_error(G(wlddev, data, logdiff = TRUE)) expect_error(G(wlddev, 1,1,"iso3c", logdiff = TRUE)) expect_error(G(wlddev, 1,1,~iso3c2, logdiff = TRUE)) expect_error(G(wlddev, 1,1,~iso3c + bla, logdiff = TRUE)) expect_error(G(wlddev, 1,1,t = rnorm(30), logdiff = TRUE)) expect_warning(G(wlddev, 1,1,g = rnorm(30), logdiff = TRUE)) expect_error(G(wlddev, 1,1,mtcars$mpg, 1:29, logdiff = TRUE)) expect_error(G(wlddev, 1,1,mtcars$mpg, mtcars$cyl, logdiff = TRUE)) # this gives a repeated values error first because length(g) == length(t) expect_error(G(wlddev,1,1, ~iso3c2, ~year2, logdiff = TRUE)) expect_error(G(wlddev, cols = ~bla, logdiff = TRUE)) expect_visible(G(wlddev, 1,1,wlddev$iso3c, ~year, cols = 9:12, logdiff = TRUE)) expect_visible(G(wlddev, 1,1,~iso3c, wlddev$year, cols = 9:12, logdiff = TRUE)) expect_error(G(wlddev, 1,1,wlddev$iso3c, ~year + bla, cols = 9:12, logdiff = TRUE)) expect_error(G(wlddev, 2,1,~iso3c3, ~year, cols = 9:12, logdiff = TRUE)) expect_error(G(wlddev, cols = c("PC3GDP","LIFEEX"), logdiff = TRUE)) }) options(warn = 1) collapse/tests/testthat/test-whichv.R0000644000176200001440000001002314166303745017453 0ustar liggesuserscontext("anyv, allv, whichv, setv, copyv etc.") # d <- replace_NA(wlddev, cols = 9:13) test_that("whichv works well", { expect_identical(whichv(wlddev$country, "Chad"), which(wlddev$country == "Chad")) expect_identical(whichv(wlddev$country, "Chad", invert = TRUE), which(wlddev$country != "Chad")) expect_identical(whichNA(wlddev$PCGDP), which(is.na(wlddev$PCGDP))) expect_identical(whichNA(wlddev$PCGDP, invert = TRUE), which(!is.na(wlddev$PCGDP))) expect_identical(whichv(is.na(wlddev$PCGDP), FALSE), which(!is.na(wlddev$PCGDP))) }) test_that("anyv, allv and whichv work properly", { for(i in seq_along(wlddev)) { vec <- .subset2(wlddev, i) v <- vec[trunc(runif(1L, 1L, length(vec)))] if(is.na(v)) v <- flast(vec) expect_identical(which(vec == v), whichv(vec, v)) if(!anyNA(vec)) expect_identical(which(vec != v), whichv(vec, v, TRUE)) expect_identical(all(vec == v), allv(vec, v)) expect_identical(any(vec == v), anyv(vec, v)) vecNA <- is.na(vec) expect_identical(which(vecNA), whichNA(vec)) expect_identical(which(!vecNA), whichNA(vec, TRUE)) expect_identical(all(vecNA), allNA(vec)) expect_identical(any(vecNA), anyNA(vec)) } expect_true(allv(rep(0.0004, 1000), 0.0004)) expect_false(allv(rep(0.0004, 1000), 0.0005)) }) wldcopy <- data.table::copy(wlddev) mtccopy <- data.table::copy(mtcars) test_that("setv and copyv work properly", { for(FUN in list(copyv, setv)) { for(i in seq_along(wlddev)) { # print(i) vec <- .subset2(wlddev, i) v <- vec[trunc(runif(1L, 1L, length(vec)))] r <- vec[trunc(runif(1L, 1L, length(vec)))] if(is.na(v)) v <- flast(vec) vl <- vec == v nvl <- vec != v vna <- is.na(vec) expect_identical(FUN(vec, v, r), replace(vec, vl, r)) expect_identical(FUN(vec, NA, r), replace(vec, vna, r)) expect_error(FUN(vec, vl, r)) expect_error(FUN(vec, 258L, r, vind1 = TRUE)) expect_error(FUN(vec, vl, r, invert = TRUE)) expect_error(FUN(vec, which(nvl), r)) expect_error(FUN(vec, which(vl), r, invert = TRUE, vind1 = TRUE)) expect_error(FUN(vec, which(nvl), r, invert = TRUE)) if(anyNA(vl)) { setv(vl, NA, FALSE) setv(nvl, NA, FALSE) } expect_identical(FUN(vec, v, vec), replace(vec, vl, vec[vl])) expect_identical(FUN(vec, NA, vec), replace(vec, vna, vec[vna])) expect_identical(FUN(vec, vl, vec), replace(vec, vl, vec[vl])) expect_identical(FUN(vec, vl, vec, invert = TRUE), replace(vec, nvl, vec[nvl])) expect_identical(FUN(vec, which(vl), vec), replace(vec, vl, vec[vl])) expect_identical(FUN(vec, which(nvl), vec), replace(vec, nvl, vec[nvl])) expect_error(FUN(vec, which(nvl), vec, invert = TRUE)) } replr <- function(x, i, v) { x[i, ] <- v x } expect_identical(FUN(mtcars, 1, 2), replace(mtcars, mtcars == 1, 2)) expect_identical(FUN(mtcars, 1, 2, invert = TRUE), replace(mtcars, mtcars != 1, 2)) if(identical(FUN, copyv)) expect_visible(FUN(mtcars, 1, mtcars$mpg, invert = TRUE)) else expect_invisible(FUN(mtcars, 1, mtcars$mpg, invert = TRUE)) expect_identical(FUN(mtcars, 23L, mtcars$mpg, vind1 = TRUE), replr(mtcars, 23L, mtcars$mpg[23L])) expect_identical(FUN(mtcars, 3:6, mtcars$mpg), replr(mtcars, 3:6, mtcars$mpg[3:6])) expect_identical(FUN(mtcars, 23L, mtcars, vind1 = TRUE), replr(mtcars, 23L, mtcars[23L, ])) expect_identical(FUN(mtcars, 3:6, mtcars), replr(mtcars, 3:6, mtcars[3:6, ])) expect_error(FUN(mtcars, 23, mtcars$mpg[4:10])) expect_warning(FUN(mtcars, 23, mtcars[4:10])) expect_error(FUN(mtcars, 23L, mtcars$mpg[4:10], vind1 = TRUE)) expect_warning(FUN(mtcars, 23L, mtcars[4:10], vind1 = TRUE)) expect_error(FUN(mtcars, 3:6, mtcars$mpg[4:10])) expect_warning(FUN(mtcars, 3:6, mtcars[4:10])) if(identical(FUN, copyv)) { expect_identical(wlddev, wldcopy) expect_identical(mtcars, mtccopy) } } }) wlddev <- wldcopy mtcars <- mtccopy collapse/tests/testthat/test-fscale-STD.R0000644000176200001440000017623114167360577020075 0ustar liggesuserscontext("fscale / STD") bsum <- base::sum # TODO: Still a few uneccessary infinity values generated with weights when the sd is null. search replace_Inf to find them. # rm(list = ls()) set.seed(101) x <- rnorm(100) w <- abs(100*rnorm(100)) wdat <- abs(100*rnorm(32)) xNA <- x wNA <- w wdatNA <- wdat xNA[sample.int(100,20)] <- NA wNA[sample.int(100,20)] <- NA wdatNA[sample.int(32, 5)] <- NA f <- as.factor(rep(1:10, each = 10)) g <- as.factor(rep(c(1,2,2,3,3,3,4,4,4,4,5,5,5,5,5,7,7,7,7,7,7,7,10,10,10,10,10,10,10,10,10,10))) mtcNA <- na_insert(mtcars) mtcNA[1,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" bscale <- function(x, na.rm = FALSE, mean = 0, sd = 1) { if(na.rm || !anyNA(x)) `attributes<-`(drop(base::scale(x)), NULL) * sd + mean else rep(NA_real_, length(x)) } # NOTE: This is what fscale currently does: If missing values, compute weighted mean and sd on available obs, and scale x using it. but don't insert aditional missing values in x for missing weights .. wbscale <- function(x, w, na.rm = FALSE, mean = 0, sd = 1) { if(na.rm) { x2 <- x cc <- complete.cases(x, w) x <- x[cc] # if(length(x) < 2L) return(rep(NA_real_, length(x2))) # wbscale(NA, 1, na.rm = TRUE) gives length 0 if(length(x) < 2L || all(x[1L] == x[-1L])) return(rep(NA_real_, length(x2))) w <- w[cc] } else { if(length(x) < 2L) return(NA_real_) ck <- all(x[1L] == x[-1L]) if(is.na(ck) || all(ck)) return(rep(NA_real_, length(x))) } sw <- bsum(w) wm <- bsum(w * x) / sw xdm <- x - wm wsd <- sqrt(bsum(w * xdm^2) / (sw - 1)) / sd if(!na.rm) return(xdm / wsd + mean) return((x2 - wm) / wsd + mean) } rnBY <- function(x, ...) { if(is.list(x) || is.array(x)) return(setRownames(BY(x, ...), rownames(x))) BY(x, ...) } wBY <- function(x, f, FUN, w, ...) { if(is.atomic(x) && !is.array(x)) return(unlist(Map(FUN, split(x, f), split(w, f), ...), use.names = FALSE)) wspl <- split(w, f) if(is.atomic(x)) return(dapply(x, function(xi) unlist(Map(FUN, split(xi, f), wspl, ...), use.names = FALSE))) qDF(dapply(x, function(xi) unlist(Map(FUN, split(xi, f), wspl, ...), use.names = FALSE), return = "matrix")) } test_that("fscale performs like bscale", { expect_equal(fscale(NA), as.double(bscale(NA))) expect_equal(fscale(NA, na.rm = FALSE), as.double(bscale(NA))) expect_equal(fscale(1), bscale(1, na.rm = TRUE)) expect_equal(fscale(1:3), bscale(1:3, na.rm = TRUE)) expect_equal(fscale(-1:1), bscale(-1:1, na.rm = TRUE)) expect_equal(fscale(1, na.rm = FALSE), bscale(1)) expect_equal(fscale(1:3, na.rm = FALSE), bscale(1:3)) expect_equal(fscale(-1:1, na.rm = FALSE), bscale(-1:1)) expect_equal(fscale(x), bscale(x, na.rm = TRUE)) expect_equal(fscale(x, na.rm = FALSE), bscale(x)) expect_equal(fscale(xNA, na.rm = FALSE), bscale(xNA)) expect_equal(fscale(xNA), bscale(xNA, na.rm = TRUE)) expect_equal(qM(fscale(mtcars)), fscale(m)) expect_equal(fscale(m), dapply(m, bscale, na.rm = TRUE)) expect_equal(fscale(m, na.rm = FALSE), dapply(m, bscale)) expect_equal(fscale(mNA, na.rm = FALSE), dapply(mNA, bscale)) expect_equal(fscale(mNA), dapply(mNA, bscale, na.rm = TRUE)) expect_equal(fscale(mtcars), dapply(mtcars, bscale, na.rm = TRUE)) expect_equal(fscale(mtcars, na.rm = FALSE), dapply(mtcars, bscale)) expect_equal(fscale(mtcNA, na.rm = FALSE), dapply(mtcNA, bscale)) expect_equal(fscale(mtcNA), dapply(mtcNA, bscale, na.rm = TRUE)) expect_equal(fscale(x, f), BY(x, f, bscale, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fscale(x, f, na.rm = FALSE), BY(x, f, bscale, use.g.names = FALSE)) expect_equal(fscale(xNA, f, na.rm = FALSE), BY(xNA, f, bscale, use.g.names = FALSE)) expect_equal(fscale(xNA, f), BY(xNA, f, bscale, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fscale(m, g), rnBY(m, g, bscale, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fscale(m, g, na.rm = FALSE), rnBY(m, g, bscale, use.g.names = FALSE)) expect_equal(fscale(mNA, g, na.rm = FALSE), rnBY(mNA, g, bscale, use.g.names = FALSE)) expect_equal(fscale(mNA, g), rnBY(mNA, g, bscale, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fscale(mtcars, g), rnBY(mtcars, g, bscale, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fscale(mtcars, g, na.rm = FALSE), rnBY(mtcars, g, bscale, use.g.names = FALSE)) expect_equal(fscale(mtcNA, g, na.rm = FALSE), rnBY(mtcNA, g, bscale, use.g.names = FALSE)) expect_equal(fscale(mtcNA, g), rnBY(mtcNA, g, bscale, na.rm = TRUE, use.g.names = FALSE)) }) su <- function(x) if(is.null(dim(x))) `attributes<-`(qsu.default(x)[2:3], NULL) else `attributes<-`(qsu(x)[,2:3], NULL) suby <- function(x, f) if(is.null(dim(x))) `attributes<-`(qsu.default(x, f)[, 2:3], NULL) else `attributes<-`(qsu(x, f)[,2:3,], NULL) miss <- unname(rep(ifelse(dapply(mNA, anyNA), NA_real_, 0), 2)) test_that("Unweighted customized scaling works as intended", { expect_equal(su(fscale(x, mean = 5.1, sd = 3.9)), c(5.1, 3.9)) expect_equal(su(fscale(x, mean = 5.1, sd = 3.9, na.rm = FALSE)), c(5.1, 3.9)) expect_equal(su(fscale(xNA, mean = 5.1, sd = 3.9, na.rm = FALSE)), c(NaN, NA)) expect_equal(su(fscale(xNA, mean = 5.1, sd = 3.9)), c(5.1, 3.9)) expect_equal(qM(fscale(mtcars, mean = 5.1, sd = 3.9)), fscale(m, mean = 5.1, sd = 3.9)) expect_equal(su(fscale(m, mean = 5.1, sd = 3.9)), rep(c(5.1, 3.9), c(11, 11))) expect_equal(su(fscale(m, mean = 5.1, sd = 3.9, na.rm = FALSE)), rep(c(5.1, 3.9), c(11, 11))) expect_equal(su(fscale(mNA, mean = 5.1, sd = 3.9, na.rm = FALSE)), rep(c(5.1, 3.9), c(11, 11))+miss) expect_equal(su(fscale(mNA, mean = 5.1, sd = 3.9)), rep(c(5.1, 3.9), c(11, 11))) expect_equal(suby(fscale(x, f, mean = 5.1, sd = 3.9), f), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) expect_equal(suby(fscale(x, f, mean = 5.1, sd = 3.9, na.rm = FALSE), f), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) expect_equal(suby(fscale(xNA, f, mean = 5.1, sd = 3.9), f), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) }) test_that("Unweighted customized scaling works like bscale (defined above)", { expect_equal(fscale(x, mean = 5.1, sd = 3.9), bscale(x, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, mean = 5.1, sd = 3.9, na.rm = FALSE), bscale(x, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, mean = 5.1, sd = 3.9, na.rm = FALSE), bscale(xNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, mean = 5.1, sd = 3.9), bscale(xNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(qM(fscale(mtcars, mean = 5.1, sd = 3.9)), fscale(m, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, mean = 5.1, sd = 3.9), dapply(m, bscale, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, mean = 5.1, sd = 3.9, na.rm = FALSE), dapply(m, bscale, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, mean = 5.1, sd = 3.9, na.rm = FALSE), dapply(mNA, bscale, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, mean = 5.1, sd = 3.9), dapply(mNA, bscale, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, f, mean = 5.1, sd = 3.9), BY(x, f, bscale, na.rm = TRUE, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, f, mean = 5.1, sd = 3.9, na.rm = FALSE), BY(x, f, bscale, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, f, mean = 5.1, sd = 3.9, na.rm = FALSE), BY(xNA, f, bscale, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, f, mean = 5.1, sd = 3.9), BY(xNA, f, bscale, na.rm = TRUE, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, g, mean = 5.1, sd = 3.9), rnBY(m, g, bscale, na.rm = TRUE, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, g, mean = 5.1, sd = 3.9, na.rm = FALSE), rnBY(m, g, bscale, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, g, mean = 5.1, sd = 3.9, na.rm = FALSE), rnBY(mNA, g, bscale, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, g, mean = 5.1, sd = 3.9), rnBY(mNA, g, bscale, na.rm = TRUE, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, g, mean = 5.1, sd = 3.9), rnBY(mtcars, g, bscale, na.rm = TRUE, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, g, mean = 5.1, sd = 3.9, na.rm = FALSE), rnBY(mtcars, g, bscale, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, g, mean = 5.1, sd = 3.9, na.rm = FALSE), rnBY(mtcNA, g, bscale, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, g, mean = 5.1, sd = 3.9), rnBY(mtcNA, g, bscale, na.rm = TRUE, use.g.names = FALSE, mean = 5.1, sd = 3.9)) }) test_that("Unweighted customized scaling special cases perform as intended ", { # No mean / centering expect_equal(fscale(x, mean = FALSE, sd = 3.9), bscale(x, na.rm = TRUE, mean = fmean(x), sd = 3.9)) expect_equal(fscale(x, mean = FALSE, sd = 3.9, na.rm = FALSE), bscale(x, mean = fmean(x), sd = 3.9)) expect_equal(fscale(xNA, mean = FALSE, sd = 3.9, na.rm = FALSE), bscale(xNA, mean = fmean(xNA), sd = 3.9)) expect_equal(fscale(xNA, mean = FALSE, sd = 3.9), bscale(xNA, na.rm = TRUE, mean = fmean(xNA), sd = 3.9)) expect_equal(qM(fscale(mtcars, mean = FALSE, sd = 3.9)), fscale(m, mean = FALSE, sd = 3.9)) expect_equal(fmean(fscale(mtcars, mean = FALSE, sd = 3.9)), fmean(mtcars)) expect_equal(unname(fsd(fscale(mtcars, mean = FALSE, sd = 3.9))), rep(3.9, length(mtcars))) expect_equal(fscale(x, f, mean = FALSE), BY(x, f, bscale, na.rm = TRUE, use.g.names = FALSE) + B(x, f)) expect_equal(fscale(x, f, na.rm = FALSE, mean = FALSE), BY(x, f, bscale, use.g.names = FALSE) + B(x, f)) expect_equal(fscale(xNA, f, na.rm = FALSE, mean = FALSE), BY(xNA, f, bscale, use.g.names = FALSE) + B(xNA, f)) expect_equal(fscale(xNA, f, mean = FALSE), BY(xNA, f, bscale, na.rm = TRUE, use.g.names = FALSE) + B(xNA, f)) expect_equal(fscale(m, g, mean = FALSE), rnBY(m, g, bscale, na.rm = TRUE, use.g.names = FALSE) + B(m, g)) expect_equal(fscale(m, g, na.rm = FALSE, mean = FALSE), rnBY(m, g, bscale, use.g.names = FALSE) + B(m, g)) expect_equal(fscale(mNA, g, na.rm = FALSE, mean = FALSE), rnBY(mNA, g, bscale, use.g.names = FALSE) + B(mNA, g)) expect_equal(fscale(mNA, g, mean = FALSE), rnBY(mNA, g, bscale, na.rm = TRUE, use.g.names = FALSE) + B(mNA, g)) expect_equal(fscale(mtcars, g, mean = FALSE), rnBY(mtcars, g, bscale, na.rm = TRUE, use.g.names = FALSE) + B(mtcars, g)) expect_equal(fscale(mtcars, g, na.rm = FALSE, mean = FALSE), rnBY(mtcars, g, bscale, use.g.names = FALSE) + B(mtcars, g)) expect_equal(fscale(mtcNA, g, na.rm = FALSE, mean = FALSE), rnBY(mtcNA, g, bscale, use.g.names = FALSE) + B(mtcNA, g)) expect_equal(fscale(mtcNA, g, mean = FALSE), rnBY(mtcNA, g, bscale, na.rm = TRUE, use.g.names = FALSE)+ B(mtcNA, g)) # Centering on overall mean expect_equal(fscale(x, f, mean = "overall.mean"), BY(x, f, bscale, na.rm = TRUE, use.g.names = FALSE) + ave(x)) expect_equal(fscale(x, f, na.rm = FALSE, mean = "overall.mean"), BY(x, f, bscale, use.g.names = FALSE) + ave(x)) # expect_equal(fscale(xNA, f, na.rm = FALSE, mean = "overall.mean"), BY(xNA, f, bscale, use.g.names = FALSE) + B(xNA)) # Not the same !! expect_equal(fscale(xNA, f, mean = "overall.mean"), BY(xNA, f, bscale, na.rm = TRUE, use.g.names = FALSE) + B(xNA)) expect_equal(fscale(m, g, mean = "overall.mean"), rnBY(m, g, bscale, na.rm = TRUE, use.g.names = FALSE) + B(m)) expect_equal(fscale(m, g, na.rm = FALSE, mean = "overall.mean"), rnBY(m, g, bscale, use.g.names = FALSE) + B(m)) # expect_equal(fscale(mNA, g, na.rm = FALSE, mean = "overall.mean"), rnBY(mNA, g, bscale, use.g.names = FALSE) + B(mNA)) expect_equal(fscale(mNA, g, mean = "overall.mean"), rnBY(mNA, g, bscale, na.rm = TRUE, use.g.names = FALSE) + B(mNA)) expect_equal(fscale(mtcars, g, mean = "overall.mean"), rnBY(mtcars, g, bscale, na.rm = TRUE, use.g.names = FALSE) + B(mtcars)) expect_equal(fscale(mtcars, g, na.rm = FALSE, mean = "overall.mean"), rnBY(mtcars, g, bscale, use.g.names = FALSE) + B(mtcars)) # expect_equal(fscale(mtcNA, g, na.rm = FALSE, mean = "overall.mean"), rnBY(mtcNA, g, bscale, use.g.names = FALSE) + B(mtcNA)) expect_equal(fscale(mtcNA, g, mean = "overall.mean"), rnBY(mtcNA, g, bscale, na.rm = TRUE, use.g.names = FALSE)+ B(mtcNA)) # Scaling by within-sd expect_equal(fscale(x, f, sd = "within.sd"), BY(x, f, bscale, na.rm = TRUE, use.g.names = FALSE) * fsd(W(x, f))) expect_equal(fscale(x, f, na.rm = FALSE, sd = "within.sd"), BY(x, f, bscale, use.g.names = FALSE) * fsd(W(x, f))) # expect_equal(fscale(xNA, f, na.rm = FALSE, sd = "within.sd"), BY(xNA, f, bscale, use.g.names = FALSE) * fsd(W(xNA, f))) # Not the same !! expect_equal(fscale(xNA, f, sd = "within.sd"), BY(xNA, f, bscale, na.rm = TRUE, use.g.names = FALSE) * fsd(W(xNA, f))) expect_equal(fscale(m, g, sd = "within.sd"), rnBY(m, g, bscale, na.rm = TRUE, use.g.names = FALSE) * TRA(m,fsd(W(m, g)),1L)) expect_equal(fscale(m, g, na.rm = FALSE, sd = "within.sd"), rnBY(m, g, bscale, use.g.names = FALSE) * TRA(m,fsd(W(m, g)),1L)) # expect_equal(fscale(mNA, g, na.rm = FALSE, sd = "within.sd"), rnBY(mNA, g, bscale, use.g.names = FALSE) * TRA(mNA,fsd(W(mNA, g)),1L)) expect_equal(fscale(mNA, g, sd = "within.sd"), rnBY(mNA, g, bscale, na.rm = TRUE, use.g.names = FALSE) * TRA(mNA,fsd(W(mNA, g)),1L)) expect_equal(fscale(mtcars, g, sd = "within.sd"), rnBY(mtcars, g, bscale, na.rm = TRUE, use.g.names = FALSE) * TRA(mtcars,fsd(W(mtcars, g)),1L)) expect_equal(fscale(mtcars, g, na.rm = FALSE, sd = "within.sd"), rnBY(mtcars, g, bscale, use.g.names = FALSE) * TRA(mtcars,fsd(W(mtcars, g)),1L)) # expect_equal(fscale(mtcNA, g, na.rm = FALSE, sd = "within.sd"), rnBY(mtcNA, g, bscale, use.g.names = FALSE) * TRA(mtcNA,fsd(W(mtcNA, g)),1L)) expect_equal(fscale(mtcNA, g, sd = "within.sd"), rnBY(mtcNA, g, bscale, na.rm = TRUE, use.g.names = FALSE) * TRA(mtcNA,fsd(W(mtcNA, g)),1L)) # Centering on overall mean and scaling by within-sd expect_equal(fscale(x, f, mean = "overall.mean", sd = "within.sd"), BY(x, f, bscale, na.rm = TRUE, use.g.names = FALSE) * fsd(W(x, f)) + ave(x)) expect_equal(fscale(x, f, na.rm = FALSE, mean = "overall.mean", sd = "within.sd"), BY(x, f, bscale, use.g.names = FALSE) * fsd(W(x, f)) + ave(x)) # expect_equal(fscale(xNA, f, na.rm = FALSE, mean = "overall.mean", sd = "within.sd"), BY(xNA, f, bscale, use.g.names = FALSE) * fsd(W(xNA, f)) + B(xNA)) # Not the same !! expect_equal(fscale(xNA, f, mean = "overall.mean", sd = "within.sd"), BY(xNA, f, bscale, na.rm = TRUE, use.g.names = FALSE) * fsd(W(xNA, f)) + B(xNA)) expect_equal(fscale(m, g, mean = "overall.mean", sd = "within.sd"), rnBY(m, g, bscale, na.rm = TRUE, use.g.names = FALSE) * TRA(m,fsd(W(m, g)),1L) + B(m)) expect_equal(fscale(m, g, na.rm = FALSE, mean = "overall.mean", sd = "within.sd"), rnBY(m, g, bscale, use.g.names = FALSE) * TRA(m,fsd(W(m, g)),1L) + B(m)) # expect_equal(fscale(mNA, g, na.rm = FALSE, mean = "overall.mean", sd = "within.sd"), rnBY(mNA, g, bscale, use.g.names = FALSE) * TRA(mNA,fsd(W(mNA, g)),1L) + B(mNA)) expect_equal(fscale(mNA, g, mean = "overall.mean", sd = "within.sd"), rnBY(mNA, g, bscale, na.rm = TRUE, use.g.names = FALSE) * TRA(mNA,fsd(W(mNA, g)),1L) + B(mNA)) expect_equal(fscale(mtcars, g, mean = "overall.mean", sd = "within.sd"), rnBY(mtcars, g, bscale, na.rm = TRUE, use.g.names = FALSE) * TRA(mtcars,fsd(W(mtcars, g)),1L) + B(mtcars)) expect_equal(fscale(mtcars, g, na.rm = FALSE, mean = "overall.mean", sd = "within.sd"), rnBY(mtcars, g, bscale, use.g.names = FALSE) * TRA(mtcars,fsd(W(mtcars, g)),1L) + B(mtcars)) # expect_equal(fscale(mtcNA, g, na.rm = FALSE, mean = "overall.mean", sd = "within.sd"), rnBY(mtcNA, g, bscale, use.g.names = FALSE) * TRA(mtcNA,fsd(W(mtcNA, g)),1L) + B(mtcNA)) expect_equal(fscale(mtcNA, g, mean = "overall.mean", sd = "within.sd"), rnBY(mtcNA, g, bscale, na.rm = TRUE, use.g.names = FALSE) * TRA(mtcNA,fsd(W(mtcNA, g)),1L) + B(mtcNA)) }) # Still test weighted special cases ... test_that("fscale performs like fscale with unit weights", { expect_equal(fscale(NA), fscale(NA, w = 1)) expect_equal(fscale(NA, na.rm = FALSE), fscale(NA, w = 1, na.rm = FALSE)) expect_equal(fscale(1), fscale(1, w = 1)) expect_equal(fscale(1:3), fscale(1:3, w = rep(1,3))) expect_equal(fscale(-1:1), fscale(-1:1, w = rep(1,3))) expect_equal(fscale(1, na.rm = FALSE), fscale(1, w = 1, na.rm = FALSE)) expect_equal(fscale(1:3, na.rm = FALSE), fscale(1:3, w = rep(1, 3), na.rm = FALSE)) expect_equal(fscale(-1:1, na.rm = FALSE), fscale(-1:1, w = rep(1, 3), na.rm = FALSE)) expect_equal(fscale(x), fscale(x, w = rep(1,100))) expect_equal(fscale(x, na.rm = FALSE), fscale(x, w = rep(1, 100), na.rm = FALSE)) expect_equal(fscale(xNA, na.rm = FALSE), fscale(xNA, w = rep(1, 100), na.rm = FALSE)) expect_equal(fscale(xNA), fscale(xNA, w = rep(1, 100))) expect_equal(fscale(m), fscale(m, w = rep(1, 32))) expect_equal(fscale(m, na.rm = FALSE), fscale(m, w = rep(1, 32), na.rm = FALSE)) expect_equal(fscale(mNA, na.rm = FALSE), fscale(mNA, w = rep(1, 32), na.rm = FALSE)) expect_equal(fscale(mNA), fscale(mNA, w = rep(1, 32))) expect_equal(fscale(mtcars), fscale(mtcars, w = rep(1, 32))) expect_equal(fscale(mtcars, na.rm = FALSE), fscale(mtcars, w = rep(1, 32), na.rm = FALSE)) expect_equal(fscale(mtcNA, na.rm = FALSE), fscale(mtcNA, w = rep(1, 32), na.rm = FALSE)) expect_equal(fscale(mtcNA), fscale(mtcNA, w = rep(1, 32))) expect_equal(fscale(x, f), fscale(x, f, rep(1,100))) expect_equal(fscale(x, f, na.rm = FALSE), fscale(x, f, rep(1,100), na.rm = FALSE)) expect_equal(fscale(xNA, f, na.rm = FALSE), fscale(xNA, f, rep(1,100), na.rm = FALSE)) expect_equal(fscale(xNA, f), fscale(xNA, f, rep(1,100))) expect_equal(fscale(m, g), fscale(m, g, rep(1,32))) expect_equal(fscale(m, g, na.rm = FALSE), fscale(m, g, rep(1,32), na.rm = FALSE)) expect_equal(fscale(mNA, g, na.rm = FALSE), fscale(mNA, g, rep(1,32), na.rm = FALSE)) expect_equal(fscale(mNA, g), fscale(mNA, g, rep(1,32))) expect_equal(fscale(mtcars, g), fscale(mtcars, g, rep(1,32))) expect_equal(fscale(mtcars, g, na.rm = FALSE), fscale(mtcars, g, rep(1,32), na.rm = FALSE)) expect_equal(fscale(mtcNA, g, na.rm = FALSE), fscale(mtcNA, g, rep(1,32), na.rm = FALSE)) expect_equal(fscale(mtcNA, g), fscale(mtcNA, g, rep(1,32))) }) test_that("fscale with weights performs like wbscale (defined above)", { # complete weights expect_equal(fscale(NA, w = 1), wbscale(NA, 1)) expect_equal(fscale(NA, w = 1, na.rm = FALSE), wbscale(NA, 1)) expect_equal(fscale(1, w = 1), wbscale(1, w = 1)) expect_equal(fscale(1:3, w = 1:3), wbscale(1:3, 1:3)) expect_equal(fscale(-1:1, w = 1:3), wbscale(-1:1, 1:3)) expect_equal(fscale(1, w = 1, na.rm = FALSE), wbscale(1, 1)) expect_equal(fscale(1:3, w = c(0.99,3454,1.111), na.rm = FALSE), wbscale(1:3, c(0.99,3454,1.111))) expect_equal(fscale(-1:1, w = 1:3, na.rm = FALSE), wbscale(-1:1, 1:3)) expect_equal(fscale(x, w = w), wbscale(x, w)) expect_equal(fscale(x, w = w, na.rm = FALSE), wbscale(x, w)) expect_equal(fscale(xNA, w = w, na.rm = FALSE), wbscale(xNA, w)) expect_equal(fscale(xNA, w = w), wbscale(xNA, w, na.rm = TRUE)) expect_equal(qM(fscale(mtcars, w = wdat)), fscale(m, w = wdat)) expect_equal(fscale(m, w = wdat), dapply(m, wbscale, wdat, na.rm = TRUE)) expect_equal(fscale(m, w = wdat, na.rm = FALSE), dapply(m, wbscale, wdat)) expect_equal(fscale(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wbscale, wdat)) expect_equal(fscale(mNA, w = wdat), dapply(mNA, wbscale, wdat, na.rm = TRUE)) expect_equal(fscale(mtcars, w = wdat), dapply(mtcars, wbscale, wdat, na.rm = TRUE)) expect_equal(fscale(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wbscale, wdat)) expect_equal(fscale(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wbscale, wdat)) expect_equal(fscale(mtcNA, w = wdat), dapply(mtcNA, wbscale, wdat, na.rm = TRUE)) expect_equal(fscale(x, f, w), wBY(x, f, wbscale, w)) expect_equal(fscale(x, f, w, na.rm = FALSE), wBY(x, f, wbscale, w)) expect_equal(fscale(xNA, f, w, na.rm = FALSE), wBY(xNA, f, wbscale, w)) expect_equal(fscale(xNA, f, w), wBY(xNA, f, wbscale, w, na.rm = TRUE)) expect_equal(fscale(m, g, wdat), wBY(m, g, wbscale, wdat, na.rm = TRUE)) expect_equal(fscale(m, g, wdat, na.rm = FALSE), wBY(m, g, wbscale, wdat)) expect_equal(fscale(mNA, g, wdat, na.rm = FALSE), wBY(mNA, g, wbscale, wdat)) expect_equal(fscale(mNA, g, wdat), wBY(mNA, g, wbscale, wdat, na.rm = TRUE)) expect_equal(fscale(mtcars, g, wdat), wBY(mtcars, g, wbscale, wdat)) expect_equal(fscale(mtcars, g, wdat, na.rm = FALSE), wBY(mtcars, g, wbscale, wdat)) expect_equal(fscale(mtcNA, g, wdat, na.rm = FALSE), wBY(mtcNA, g, wbscale, wdat)) expect_equal(fscale(mtcNA, g, wdat), wBY(mtcNA, g, wbscale, wdat, na.rm = TRUE)) # missing weights expect_equal(fscale(NA, w = NA), wbscale(NA, NA)) expect_equal(fscale(NA, w = NA, na.rm = FALSE), wbscale(NA, NA)) expect_equal(fscale(1, w = NA), wbscale(1, w = NA)) expect_equal(fscale(1:3, w = c(NA,1:2)), wbscale(1:3, c(NA,1:2), na.rm = TRUE)) expect_equal(fscale(-1:1, w = c(NA,1:2)), wbscale(-1:1, c(NA,1:2), na.rm = TRUE)) expect_equal(fscale(1, w = NA, na.rm = FALSE), wbscale(1, NA)) expect_equal(fscale(1:3, w = c(NA,1:2), na.rm = FALSE), wbscale(1:3, c(NA,1:2))) expect_equal(fscale(-1:1, w = c(NA,1:2), na.rm = FALSE), wbscale(-1:1, c(NA,1:2))) expect_equal(fscale(x, w = wNA), wbscale(x, wNA, na.rm = TRUE)) expect_equal(fscale(x, w = wNA, na.rm = FALSE), wbscale(x, wNA)) expect_equal(fscale(xNA, w = wNA, na.rm = FALSE), wbscale(xNA, wNA)) expect_equal(fscale(xNA, w = wNA), wbscale(xNA, wNA, na.rm = TRUE)) expect_equal(qM(fscale(mtcars, w = wdatNA)), fscale(m, w = wdatNA)) expect_equal(fscale(m, w = wdatNA), dapply(m, wbscale, wdatNA, na.rm = TRUE)) expect_equal(fscale(m, w = wdatNA, na.rm = FALSE), dapply(m, wbscale, wdatNA)) expect_equal(fscale(mNA, w = wdatNA, na.rm = FALSE), dapply(mNA, wbscale, wdatNA)) expect_equal(fscale(mNA, w = wdatNA), dapply(mNA, wbscale, wdatNA, na.rm = TRUE)) expect_equal(fscale(mtcars, w = wdatNA), dapply(mtcars, wbscale, wdatNA, na.rm = TRUE)) expect_equal(fscale(mtcars, w = wdatNA, na.rm = FALSE), dapply(mtcars, wbscale, wdatNA)) expect_equal(fscale(mtcNA, w = wdatNA, na.rm = FALSE), dapply(mtcNA, wbscale, wdatNA)) expect_equal(fscale(mtcNA, w = wdatNA), dapply(mtcNA, wbscale, wdatNA, na.rm = TRUE)) expect_equal(fscale(x, f, wNA), wBY(x, f, wbscale, wNA, na.rm = TRUE)) expect_equal(fscale(x, f, wNA, na.rm = FALSE), wBY(x, f, wbscale, wNA)) expect_equal(fscale(xNA, f, wNA, na.rm = FALSE), wBY(xNA, f, wbscale, wNA)) # expect_equal(fscale(xNA, f, wNA), wBY(xNA, f, wbscale, wNA, na.rm = TRUE)) # failed on release-windows-ix86+x86_64 expect_equal(replace_Inf(fscale(m, g, wdatNA), NA), wBY(m, g, wbscale, wdatNA, na.rm = TRUE)) expect_equal(fscale(m, g, wdatNA, na.rm = FALSE), wBY(m, g, wbscale, wdatNA)) expect_equal(fscale(mNA, g, wdatNA, na.rm = FALSE), wBY(mNA, g, wbscale, wdatNA)) expect_equal(replace_Inf(fscale(mNA, g, wdatNA), NA), wBY(mNA, g, wbscale, wdatNA, na.rm = TRUE)) expect_equal(replace_Inf(fscale(mtcars, g, wdatNA), NA), wBY(mtcars, g, wbscale, wdatNA, na.rm = TRUE)) expect_equal(fscale(mtcars, g, wdatNA, na.rm = FALSE), wBY(mtcars, g, wbscale, wdatNA)) expect_equal(fscale(mtcNA, g, wdatNA, na.rm = FALSE), wBY(mtcNA, g, wbscale, wdatNA)) expect_equal(replace_Inf(fscale(mtcNA, g, wdatNA), NA), wBY(mtcNA, g, wbscale, wdatNA, na.rm = TRUE)) }) wsu <- function(x, w) if(is.null(dim(x))) `attributes<-`(qsu.default(x, w = w)[2:3], NULL) else `attributes<-`(qsu(x, w = w)[,2:3], NULL) wsuby <- function(x, f, w) if(is.null(dim(x))) `attributes<-`(qsu.default(x, f, w = w)[, 2:3], NULL) else `attributes<-`(qsu(x, f, w = w)[,2:3,], NULL) test_that("Weighted customized scaling works as intended", { expect_equal(wsu(fscale(x, w = w, mean = 5.1, sd = 3.9), w = w), c(5.1, 3.9)) expect_equal(wsu(fscale(x, w = w, mean = 5.1, sd = 3.9, na.rm = FALSE), w = w), c(5.1, 3.9)) expect_equal(wsu(fscale(xNA, w = w, mean = 5.1, sd = 3.9, na.rm = FALSE), w = w), c(NaN, NA)) expect_equal(wsu(fscale(xNA, w = w, mean = 5.1, sd = 3.9), w = w), c(5.1, 3.9), w = w) expect_equal(qM(fscale(mtcars, w = wdat, mean = 5.1, sd = 3.9)), fscale(m, w = wdat, mean = 5.1, sd = 3.9)) expect_equal(wsu(fscale(m, w = wdat, mean = 5.1, sd = 3.9), w = wdat), rep(c(5.1, 3.9), c(11, 11))) expect_equal(wsu(fscale(m, w = wdat, mean = 5.1, sd = 3.9, na.rm = FALSE), w = wdat), rep(c(5.1, 3.9), c(11, 11))) expect_equal(wsu(fscale(mNA, w = wdat, mean = 5.1, sd = 3.9, na.rm = FALSE), w = wdat), rep(c(5.1, 3.9), c(11, 11))+miss) expect_equal(wsu(fscale(mNA, w = wdat, mean = 5.1, sd = 3.9), w = wdat), rep(c(5.1, 3.9), c(11, 11))) expect_equal(wsuby(fscale(x, f, w = w, mean = 5.1, sd = 3.9), f, w = w), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) expect_equal(wsuby(fscale(x, f, w = w, mean = 5.1, sd = 3.9, na.rm = FALSE), f, w = w), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) expect_equal(wsuby(fscale(xNA, f, w = w, mean = 5.1, sd = 3.9), f, w = w), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) expect_equal(wsu(fscale(x, w = w, mean = FALSE, sd = 3.9), w = w), c(fmean(x, w = w), 3.9)) expect_equal(wsu(fscale(x, w = w, mean = FALSE, sd = 3.9, na.rm = FALSE), w = w), c(fmean(x, w = w), 3.9)) expect_equal(wsu(fscale(xNA, w = w, mean = FALSE, sd = 3.9, na.rm = FALSE), w = w), c(NaN, NA)) expect_equal(wsu(fscale(xNA, w = w, mean = FALSE, sd = 3.9), w = w), c(fmean(xNA, w = w), 3.9)) expect_equal(qM(fscale(mtcars, w = wdat, mean = FALSE, sd = 3.9)), fscale(m, w = wdat, mean = FALSE, sd = 3.9)) # ... # expect_equal(wsuby(fscale(x, f, w = w, mean = "overall.mean", sd = 3.9), f, w = w), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) # expect_equal(wsuby(fscale(x, f, w = w, mean = FALSE, sd = 3.9, na.rm = FALSE), f, w = w), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) # expect_equal(wsuby(fscale(xNA, f, w = w, mean = FALSE, sd = 3.9), f, w = w), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) }) test_that("Weighted customized scaling performs like wbscale (defined above)", { # complete weights expect_equal(fscale(NA, w = 1, mean = 5.1, sd = 3.9), wbscale(NA, 1, mean = 5.1, sd = 3.9)) expect_equal(fscale(NA, w = 1, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(NA, 1, mean = 5.1, sd = 3.9)) expect_equal(fscale(1, w = 1, mean = 5.1, sd = 3.9), wbscale(1, w = 1, mean = 5.1, sd = 3.9)) expect_equal(fscale(1:3, w = 1:3, mean = 5.1, sd = 3.9), wbscale(1:3, 1:3, mean = 5.1, sd = 3.9)) expect_equal(fscale(-1:1, w = 1:3, mean = 5.1, sd = 3.9), wbscale(-1:1, 1:3, mean = 5.1, sd = 3.9)) expect_equal(fscale(1, w = 1, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(1, 1, mean = 5.1, sd = 3.9)) expect_equal(fscale(1:3, w = c(0.99,3454,1.111), na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(1:3, c(0.99,3454,1.111), mean = 5.1, sd = 3.9)) expect_equal(fscale(-1:1, w = 1:3, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(-1:1, 1:3, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, w = w, mean = 5.1, sd = 3.9), wbscale(x, w, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, w = w, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(x, w, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, w = w, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(xNA, w, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, w = w, mean = 5.1, sd = 3.9), wbscale(xNA, w, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(qM(fscale(mtcars, w = wdat, mean = 5.1, sd = 3.9)), fscale(m, w = wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, w = wdat, mean = 5.1, sd = 3.9), dapply(m, wbscale, wdat, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, w = wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), dapply(m, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, w = wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), dapply(mNA, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, w = wdat, mean = 5.1, sd = 3.9), dapply(mNA, wbscale, wdat, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, w = wdat, mean = 5.1, sd = 3.9), dapply(mtcars, wbscale, wdat, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, w = wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), dapply(mtcars, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, w = wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), dapply(mtcNA, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, w = wdat, mean = 5.1, sd = 3.9), dapply(mtcNA, wbscale, wdat, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, f, w, mean = 5.1, sd = 3.9), wBY(x, f, wbscale, w, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, f, w, na.rm = FALSE, mean = 5.1, sd = 3.9), wBY(x, f, wbscale, w, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, f, w, na.rm = FALSE, mean = 5.1, sd = 3.9), wBY(xNA, f, wbscale, w, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, f, w, mean = 5.1, sd = 3.9), wBY(xNA, f, wbscale, w, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, g, wdat, mean = 5.1, sd = 3.9), wBY(m, g, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, g, wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), wBY(m, g, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, g, wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), wBY(mNA, g, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, g, wdat, mean = 5.1, sd = 3.9), wBY(mNA, g, wbscale, wdat, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, g, wdat, mean = 5.1, sd = 3.9), wBY(mtcars, g, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, g, wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), wBY(mtcars, g, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, g, wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), wBY(mtcNA, g, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, g, wdat, mean = 5.1, sd = 3.9), wBY(mtcNA, g, wbscale, wdat, na.rm = TRUE, mean = 5.1, sd = 3.9)) # missing weights expect_equal(fscale(NA, w = NA, mean = 5.1, sd = 3.9), wbscale(NA, NA, mean = 5.1, sd = 3.9)) expect_equal(fscale(NA, w = NA, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(NA, NA, mean = 5.1, sd = 3.9)) expect_equal(fscale(1, w = NA, mean = 5.1, sd = 3.9), wbscale(1, w = NA, mean = 5.1, sd = 3.9)) expect_equal(fscale(1:3, w = c(NA,1:2), mean = 5.1, sd = 3.9), wbscale(1:3, c(NA,1:2), na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(-1:1, w = c(NA,1:2), mean = 5.1, sd = 3.9), wbscale(-1:1, c(NA,1:2), na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(1, w = NA, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(1, NA, mean = 5.1, sd = 3.9)) expect_equal(fscale(1:3, w = c(NA,1:2), na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(1:3, c(NA,1:2), mean = 5.1, sd = 3.9)) expect_equal(fscale(-1:1, w = c(NA,1:2), na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(-1:1, c(NA,1:2), mean = 5.1, sd = 3.9)) expect_equal(fscale(x, w = wNA, mean = 5.1, sd = 3.9), wbscale(x, wNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, w = wNA, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(x, wNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, w = wNA, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(xNA, wNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, w = wNA, mean = 5.1, sd = 3.9), wbscale(xNA, wNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(qM(fscale(mtcars, w = wdatNA, mean = 5.1, sd = 3.9)), fscale(m, w = wdatNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, w = wdatNA, mean = 5.1, sd = 3.9), dapply(m, wbscale, wdatNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, w = wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), dapply(m, wbscale, wdatNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, w = wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), dapply(mNA, wbscale, wdatNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, w = wdatNA, mean = 5.1, sd = 3.9), dapply(mNA, wbscale, wdatNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, w = wdatNA, mean = 5.1, sd = 3.9), dapply(mtcars, wbscale, wdatNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, w = wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), dapply(mtcars, wbscale, wdatNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, w = wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), dapply(mtcNA, wbscale, wdatNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, w = wdatNA, mean = 5.1, sd = 3.9), dapply(mtcNA, wbscale, wdatNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, f, wNA, mean = 5.1, sd = 3.9), wBY(x, f, wbscale, wNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, f, wNA, na.rm = FALSE, mean = 5.1, sd = 3.9), wBY(x, f, wbscale, wNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, f, wNA, na.rm = FALSE, mean = 5.1, sd = 3.9), wBY(xNA, f, wbscale, wNA, mean = 5.1, sd = 3.9)) # expect_equal(fscale(xNA, f, wNA, mean = 5.1, sd = 3.9), wBY(xNA, f, wbscale, wNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) # failed on release-windows-ix86+x86_64 expect_equal(replace_Inf(fscale(m, g, wdatNA, mean = 5.1, sd = 3.9), NA), wBY(m, g, wbscale, wdatNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, g, wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), wBY(m, g, wbscale, wdatNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, g, wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), wBY(mNA, g, wbscale, wdatNA, mean = 5.1, sd = 3.9)) expect_equal(replace_Inf(fscale(mNA, g, wdatNA, mean = 5.1, sd = 3.9), NA), wBY(mNA, g, wbscale, wdatNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(replace_Inf(fscale(mtcars, g, wdatNA, mean = 5.1, sd = 3.9), NA), wBY(mtcars, g, wbscale, wdatNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, g, wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), wBY(mtcars, g, wbscale, wdatNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, g, wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), wBY(mtcNA, g, wbscale, wdatNA, mean = 5.1, sd = 3.9)) expect_equal(replace_Inf(fscale(mtcNA, g, wdatNA, mean = 5.1, sd = 3.9), NA), wBY(mtcNA, g, wbscale, wdatNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) }) test_that("Weighted customized scaling special cases perform as intended ", { # NOTE: These tests are currently only run with complete weights. STill implement them for missing weights ... # No mean / centering expect_equal(fscale(x, w = w, mean = FALSE, sd = 3.9), wbscale(x, na.rm = TRUE, w = w, mean = fmean(x, w = w), sd = 3.9)) expect_equal(fscale(x, w = w, mean = FALSE, sd = 3.9, na.rm = FALSE), wbscale(x, w = w, mean = fmean(x, w = w), sd = 3.9)) expect_equal(fscale(xNA, w = w, mean = FALSE, sd = 3.9, na.rm = FALSE), wbscale(xNA, w = w, mean = fmean(xNA, w = w), sd = 3.9)) expect_equal(fscale(xNA, w = w, mean = FALSE, sd = 3.9), wbscale(xNA, na.rm = TRUE, w = w, mean = fmean(xNA, w = w), sd = 3.9)) expect_equal(qM(fscale(mtcars, w = wdat, mean = FALSE, sd = 3.9)), fscale(m, w = wdat, mean = FALSE, sd = 3.9)) expect_equal(fmean(fscale(mtcars, w = wdat, mean = FALSE, sd = 3.9), w = wdat), fmean(mtcars, w = wdat)) expect_equal(unname(fsd(fscale(mtcars, w = wdat, mean = FALSE, sd = 3.9), w = wdat)), rep(3.9, length(mtcars))) expect_equal(fscale(x, f, w, mean = FALSE), unlist(Map(wbscale, split(x, f), split(w, f), na.rm = TRUE), use.names = FALSE) + B(x, f, w)) expect_equal(fscale(x, f, w, mean = FALSE, na.rm = FALSE), unlist(Map(wbscale, split(x, f), split(w, f)), use.names = FALSE) + B(x, f, w)) expect_equal(fscale(xNA, f, w, mean = FALSE, na.rm = FALSE), unlist(Map(wbscale, split(xNA, f), split(w, f)), use.names = FALSE) + B(xNA, f, w)) expect_equal(fscale(xNA, f, w, mean = FALSE), unlist(Map(wbscale, split(xNA, f), split(w, f), na.rm = TRUE), use.names = FALSE) + B(xNA, f, w)) # Centering on overall mean expect_equal(fscale(x, f, w, mean = "overall.mean"), unlist(Map(wbscale, split(x, f), split(w, f), na.rm = TRUE), use.names = FALSE) + B(x, w = w)) expect_equal(fscale(x, f, w, mean = "overall.mean", na.rm = FALSE), unlist(Map(wbscale, split(x, f), split(w, f)), use.names = FALSE) + B(x, w = w)) # expect_equal(fscale(xNA, f, w, mean = "overall.mean", na.rm = FALSE), unlist(Map(wbscale, split(xNA, f), split(w, f)), use.names = FALSE) + B(xNA, w = w)) # Not the same !! expect_equal(fscale(xNA, f, w, mean = "overall.mean"), unlist(Map(wbscale, split(xNA, f), split(w, f), na.rm = TRUE), use.names = FALSE) + B(xNA, w = w)) # Scaling by within-sd expect_equal(fscale(x, f, w, sd = "within.sd"), unlist(Map(wbscale, split(x, f), split(w, f), na.rm = TRUE), use.names = FALSE) * fsd(W(x, f, w), w = w)) expect_equal(fscale(x, f, w, sd = "within.sd", na.rm = FALSE), unlist(Map(wbscale, split(x, f), split(w, f)), use.names = FALSE) * fsd(W(x, f, w), w = w)) # expect_equal(fscale(xNA, f, w, sd = "within.sd", na.rm = FALSE), unlist(Map(wbscale, split(xNA, f), split(w, f)), use.names = FALSE) * fsd(W(xNA, f, w), w = w)) # Not the same !! expect_equal(fscale(xNA, f, w, sd = "within.sd"), unlist(Map(wbscale, split(xNA, f), split(w, f), na.rm = TRUE), use.names = FALSE) * fsd(W(xNA, f, w), w = w)) # Centering on overall mean and scaling by within-sd expect_equal(fscale(x, f, w, mean = "overall.mean", sd = "within.sd"), unlist(Map(wbscale, split(x, f), split(w, f), na.rm = TRUE), use.names = FALSE) * fsd(W(x, f, w), w = w) + B(x, w = w)) expect_equal(fscale(x, f, w, mean = "overall.mean", sd = "within.sd", na.rm = FALSE), unlist(Map(wbscale, split(x, f), split(w, f)), use.names = FALSE) * fsd(W(x, f, w), w = w) + B(x, w = w)) # expect_equal(fscale(xNA, f, w, mean = "overall.mean", sd = "within.sd", na.rm = FALSE), unlist(Map(wbscale, split(xNA, f), split(w, f)), use.names = FALSE) * fsd(W(xNA, f, w), w = w) + B(xNA, w = w)) # Not the same !! expect_equal(fscale(xNA, f, w, mean = "overall.mean", sd = "within.sd"), unlist(Map(wbscale, split(xNA, f), split(w, f), na.rm = TRUE), use.names = FALSE) * fsd(W(xNA, f, w), w = w) + B(xNA, w = w)) }) test_that("fscale performs numerically stable", { expect_true(all_obj_equal(replicate(50, fscale(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g), simplify = FALSE))) }) test_that("fscale customized scaling performs numerically stable", { expect_true(all_obj_equal(replicate(50, fscale(1, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, mean = 5.1, sd = 3.9), simplify = FALSE))) }) test_that("fscale with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fscale(1, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, w = 1, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fscale customized scaling with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fscale(1, w = 1, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, w = 1, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, w = 1, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, w = w, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, w = w, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, w = w, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, w = w, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, w = wdat, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, w = wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, w = wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, w = wdat, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, w = wdat, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, w = wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, w = wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, w = wdat, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, w, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, w, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, w, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, w, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, wdat, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, wdat, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, wdat, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, wdat, mean = 5.1, sd = 3.9), simplify = FALSE))) }) test_that("fscale with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fscale(1, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, w = NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, wdatNA), simplify = FALSE))) }) test_that("fscale customized scaling with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fscale(1, w = NA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, w = NA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, w = NA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, w = wNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, w = wNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, w = wNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, w = wNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, w = wdatNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, w = wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, w = wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, w = wdatNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, w = wdatNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, w = wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, w = wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, w = wdatNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, wNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, wNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, wNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, wNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, wdatNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, wdatNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, wdatNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, wdatNA, mean = 5.1, sd = 3.9), simplify = FALSE))) }) # NOTE: fscale(c(a, a)) gives c(NaN, NaN) (sd is 0) !!! test_that("fscale handles special values in the right way", { expect_equal(fscale(NA), NA_real_) expect_equal(fscale(NaN), NA_real_) expect_equal(fscale(Inf), NA_real_) expect_equal(fscale(-Inf), NA_real_) expect_equal(fscale(TRUE), NA_real_) expect_equal(fscale(FALSE), NA_real_) expect_equal(fscale(NA, na.rm = FALSE), NA_real_) expect_equal(fscale(NaN, na.rm = FALSE), NA_real_) expect_equal(fscale(Inf, na.rm = FALSE), NA_real_) expect_equal(fscale(-Inf, na.rm = FALSE), NA_real_) expect_equal(fscale(TRUE, na.rm = FALSE), NA_real_) expect_equal(fscale(FALSE, na.rm = FALSE), NA_real_) expect_equal(fscale(c(1,NA)), c(NA_real_,NA_real_)) expect_equal(fscale(c(1,NaN)), c(NA_real_,NA_real_)) expect_equal(fscale(c(1,Inf)), c(NA_real_,NA_real_)) expect_equal(fscale(c(1,-Inf)), c(NA_real_,NA_real_)) expect_equal(fscale(c(1,Inf), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fscale(c(1,-Inf), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fscale(c(FALSE,FALSE), na.rm = FALSE), c(NaN,NaN)) expect_equal(fscale(c(1,1), na.rm = FALSE), c(NaN,NaN)) }) test_that("fscale with weights handles special values in the right way", { expect_equal(fscale(NA, w = 1), NA_real_) expect_equal(fscale(NaN, w = 1), NA_real_) expect_equal(fscale(Inf, w = 1), NA_real_) expect_equal(fscale(-Inf, w = 1), NA_real_) expect_equal(fscale(TRUE, w = 1), NA_real_) expect_equal(fscale(FALSE, w = 1), NA_real_) expect_equal(fscale(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fscale(NaN, w = 1, na.rm = FALSE), NA_real_) expect_equal(fscale(Inf, w = 1, na.rm = FALSE), NA_real_) expect_equal(fscale(-Inf, w = 1, na.rm = FALSE), NA_real_) expect_equal(fscale(TRUE, w = 1, na.rm = FALSE), NA_real_) expect_equal(fscale(FALSE, w = 1, na.rm = FALSE), NA_real_) expect_equal(fscale(NA, w = NA), NA_real_) expect_equal(fscale(NaN, w = NA), NA_real_) expect_equal(fscale(Inf, w = NA), NA_real_) expect_equal(fscale(-Inf, w = NA), NA_real_) expect_equal(fscale(TRUE, w = NA), NA_real_) expect_equal(fscale(FALSE, w = NA), NA_real_) expect_equal(fscale(NA, w = NA, na.rm = FALSE), NA_real_) expect_equal(fscale(NaN, w = NA, na.rm = FALSE), NA_real_) expect_equal(fscale(Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fscale(-Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fscale(TRUE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fscale(FALSE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fscale(1:3, w = c(1,Inf,3)), c(NA_real_,NA_real_,NA_real_)) expect_equal(fscale(1:3, w = c(1,-Inf,3)), c(NA_real_,NA_real_,NA_real_)) expect_equal(fscale(1:3, w = c(1,Inf,3), na.rm = FALSE), c(NA_real_,NA_real_,NA_real_)) expect_equal(fscale(1:3, w = c(1,-Inf,3), na.rm = FALSE), c(NA_real_,NA_real_,NA_real_)) }) test_that("fscale produces errors for wrong input", { expect_error(fscale("a")) expect_error(fscale(NA_character_)) expect_error(fscale(mNAc)) expect_error(fscale(mNAc, f)) expect_error(fscale(1:2,1:3)) expect_error(fscale(m,1:31)) expect_error(fscale(mtcars,1:31)) expect_error(fscale(mtcars, w = 1:31)) expect_error(fscale("a", w = 1)) expect_error(fscale(1:2, w = 1:3)) expect_error(fscale(NA_character_, w = 1)) expect_error(fscale(mNAc, w = wdat)) expect_error(fscale(mNAc, f, wdat)) expect_error(fscale(mNA, w = 1:33)) expect_error(fscale(1:2,1:2, 1:3)) expect_error(fscale(m,1:32,1:20)) expect_error(fscale(mtcars,1:32,1:10)) expect_error(fscale(1:2, w = c("a","b"))) expect_error(fscale(wlddev)) expect_error(fscale(wlddev, w = wlddev$year)) expect_error(fscale(wlddev, wlddev$iso3c)) expect_error(fscale(wlddev, wlddev$iso3c, wlddev$year)) }) test_that("fscale shoots errors for wrong input to mean and sd", { expect_error(fscale(x, sd = FALSE)) expect_error(fscale(m, sd = FALSE)) expect_error(fscale(mtcars, sd = FALSE)) expect_error(fscale(x, sd = "bla")) expect_error(fscale(x, mean = "bla")) expect_error(fscale(x, sd = "within.sd")) expect_error(fscale(m, sd = "within.sd")) expect_error(fscale(mtcars, sd = "within.sd")) expect_error(fscale(x, mean = "overall.mean")) expect_error(fscale(m, mean = "overall.mean")) expect_error(fscale(mtcars, mean = "overall.mean")) expect_error(fscale(m, mean = fmean(m))) expect_error(fscale(mtcars, mean = fmean(mtcars))) expect_error(fscale(m, sd = fsd(m))) expect_error(fscale(mtcars, sd = fsd(mtcars))) }) # Testing STD: Only testing wrong inputs, especially for data.frame method. Otherwise it is identical to fscale test_that("STD produces errors for wrong input", { expect_error(STD("a")) expect_error(STD(NA_character_)) expect_error(STD(mNAc)) expect_error(STD(mNAc, f)) expect_error(STD(1:2,1:3)) expect_error(STD(m,1:31)) expect_error(STD(mtcars,1:31)) expect_error(STD(mtcars, w = 1:31)) expect_error(STD("a", w = 1)) expect_error(STD(1:2, w = c("a","b"))) expect_error(STD(1:2, w = 1:3)) expect_error(STD(NA_character_, w = 1)) expect_error(STD(mNAc, w = wdat)) expect_error(STD(mNAc, f, wdat)) expect_error(STD(mNA, w = 1:33)) expect_error(STD(mtcNA, w = 1:33)) expect_error(STD(1:2,1:2, 1:3)) expect_error(STD(m,1:32,1:20)) expect_error(STD(mtcars,1:32,1:10)) expect_error(STD(1:2, 1:3, 1:2)) expect_error(STD(m,1:31,1:32)) expect_error(STD(mtcars,1:33,1:32)) }) test_that("STD.data.frame method is foolproof", { expect_visible(STD(wlddev)) expect_visible(STD(wlddev, w = wlddev$year)) expect_visible(STD(wlddev, w = ~year)) expect_visible(STD(wlddev, wlddev$iso3c)) expect_visible(STD(wlddev, ~iso3c)) expect_visible(STD(wlddev, ~iso3c + region)) expect_visible(STD(wlddev, wlddev$iso3c, wlddev$year)) expect_visible(STD(wlddev, ~iso3c, ~year)) expect_visible(STD(wlddev, cols = 9:12)) expect_visible(STD(wlddev, w = wlddev$year, cols = 9:12)) expect_visible(STD(wlddev, w = ~year, cols = 9:12)) expect_visible(STD(wlddev, wlddev$iso3c, cols = 9:12)) expect_visible(STD(wlddev, ~iso3c, cols = 9:12)) expect_visible(STD(wlddev, wlddev$iso3c, wlddev$year, cols = 9:12)) expect_visible(STD(wlddev, ~iso3c, ~year, cols = 9:12)) expect_visible(STD(wlddev, cols = c("PCGDP","LIFEEX"))) expect_visible(STD(wlddev, w = wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(STD(wlddev, w = ~year, cols = c("PCGDP","LIFEEX"))) expect_visible(STD(wlddev, wlddev$iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(STD(wlddev, ~iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(STD(wlddev, wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(STD(wlddev, ~iso3c, ~year, cols = c("PCGDP","LIFEEX"))) expect_error(STD(wlddev, cols = NULL)) expect_error(STD(wlddev, w = wlddev$year, cols = NULL)) expect_error(STD(wlddev, w = ~year, cols = NULL)) expect_error(STD(wlddev, wlddev$iso3c, cols = NULL)) expect_error(STD(wlddev, ~iso3c, cols = NULL)) expect_error(STD(wlddev, wlddev$iso3c, wlddev$year, cols = NULL)) expect_error(STD(wlddev, ~iso3c, ~year, cols = NULL)) expect_error(STD(wlddev, cols = 9:14)) expect_error(STD(wlddev, w = wlddev$year, cols = 9:14)) expect_error(STD(wlddev, w = ~year, cols = 9:14)) expect_error(STD(wlddev, wlddev$iso3c, cols = 9:14)) expect_error(STD(wlddev, ~iso3c, cols = 9:14)) expect_error(STD(wlddev, wlddev$iso3c, wlddev$year, cols = 9:14)) expect_error(STD(wlddev, ~iso3c, ~year, cols = 9:14)) expect_error(STD(wlddev, cols = c("PCGDP","LIFEEX","bla"))) expect_error(STD(wlddev, w = wlddev$year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(STD(wlddev, w = ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(STD(wlddev, wlddev$iso3c, cols = c("PCGDP","LIFEEX","bla"))) expect_error(STD(wlddev, ~iso3c, cols = c("PCGDP","LIFEEX","bla"))) expect_error(STD(wlddev, wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(STD(wlddev, ~iso3c, ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(STD(wlddev, w = mtcars)) expect_error(STD(wlddev, w = 4)) expect_error(STD(wlddev, w = "year")) expect_error(STD(wlddev, w = ~year2)) suppressWarnings(expect_error(STD(wlddev, w = ~year + region))) expect_error(STD(wlddev, mtcars)) expect_error(STD(wlddev, 2)) expect_error(STD(wlddev, "iso3c")) expect_error(STD(wlddev, ~iso3c2)) expect_error(STD(wlddev, ~iso3c + bla)) expect_error(STD(wlddev, mtcars$mpg, mtcars$cyl)) expect_error(STD(wlddev, 2, 4)) expect_error(STD(wlddev, ~iso3c2, ~year2)) expect_error(STD(wlddev, cols = ~bla)) expect_error(STD(wlddev, w = ~bla, cols = 9:12)) expect_error(STD(wlddev, w = 4, cols = 9:12)) expect_error(STD(wlddev, w = "year", cols = 9:12)) expect_error(STD(wlddev, w = ~yewar, cols = 9:12)) expect_error(STD(wlddev, mtcars$mpg, cols = 9:12)) expect_error(STD(wlddev, ~iso3c + ss, cols = 9:12)) expect_error(STD(wlddev, 2, cols = 9:12)) expect_error(STD(wlddev, "iso3c", cols = 9:12)) expect_error(STD(wlddev, wlddev$iso3c, ~year + bla, cols = 9:12)) expect_error(STD(wlddev, ~iso3c3, ~year, cols = 9:12)) expect_error(STD(wlddev, cols = c("PC3GDP","LIFEEX"))) }) collapse/tests/testthat/test-TRA.R0000644000176200001440000002001714167351215016611 0ustar liggesuserscontext("TRA") bmean <- base::mean # rm(list = ls()) set.seed(101) d <- na_insert(iris[1:4]) v <- d$Sepal.Length m <- as.matrix(d) f <- iris$Species # For sweep replace <- function(x, y) `[<-`(y, is.na(x), value = NA) # `[<-`(x, !is.na(x), value = y) replace_fill <- function(x, y) y # rep(y, length(x)) "%" <- function(x, y) x * (100 / y) "-%%" <- function(x, y) x - (x %% y) # "-+" <- function(x, y) x - y + bmean(x, na.rm = TRUE) test_that("TRA performs like sweep", { ops <- c("replace_fill", "replace", "-", "+", "*", "/", "%", "%%", "-%%") for(i in ops) { expect_equal(drop(sweep(qM(v), 2L, bmean(v, na.rm = TRUE), i)), TRA(v, bmean(v, na.rm = TRUE), i)) expect_equal(`attributes<-`(sweep(qM(m), 2L, colMeans(m, na.rm = TRUE), i), attributes(m)), TRA(m, colMeans(m, na.rm = TRUE), i)) expect_equal(setNames(qDF(sweep(d, 2L, colMeans(qM(d), na.rm = TRUE), i)), names(d)), TRA(d, colMeans(qM(d), na.rm = TRUE), i)) } for(i in ops) { expect_equal(unlist(Map(function(x, y) drop(sweep(qM(x), 2L, y, i)), rsplit(v, f), as.list(fmean(v, f))), use.names = FALSE), TRA(v, fmean(v, f), i, f)) expect_equal(`attributes<-`(do.call(rbind, Map(function(x, y) sweep(qM(x), 2L, y, i), lapply(rsplit(qDF(m), f), qM), mrtl(fmean(m, f)))), attributes(m)), TRA(m, fmean(m, f), i, f)) expect_equal(`attributes<-`(unlist2d(Map(function(x, y) sweep(x, 2L, y, i), rsplit(d, f), mrtl(qM(fmean(d, f)))), idcols = FALSE), attributes(d)), TRA(d, fmean(d, f), i, f)) } }) test_that("TRA performs like built-in version", { for(i in seq_len(10)[-4]) { expect_equal(TRA(v, fmean(v), i), fmean(v, TRA = i)) expect_equal(TRA(m, fmean(m), i), fmean(m, TRA = i)) expect_equal(TRA(d, fmean(d), i), fmean(d, TRA = i)) } for(i in seq_len(10)) { expect_equal(TRA(v, fmean(v, f), i, f), fmean(v, f, TRA = i)) expect_equal(TRA(m, fmean(m, f), i, f), fmean(m, f, TRA = i)) expect_equal(TRA(d, fmean(d, f), i, f), fmean(d, f, TRA = i)) } }) test_that("TRA performs like fbetween and fwithin", { expect_equal(TRA(v, fmean(v), 1L), fbetween(v, fill = TRUE)) expect_equal(TRA(v, fmean(v), 2L), fbetween(v)) expect_equal(TRA(v, fmean(v), 3L), fwithin(v)) expect_equal(TRA(m, fmean(m), 1L), fbetween(m, fill = TRUE)) expect_equal(TRA(m, fmean(m), 2L), fbetween(m)) expect_equal(TRA(m, fmean(m), 3L), fwithin(m)) expect_equal(TRA(d, fmean(d), 1L), fbetween(d, fill = TRUE)) expect_equal(TRA(d, fmean(d), 2L), fbetween(d)) expect_equal(TRA(d, fmean(d), 3L), fwithin(d)) expect_equal(TRA(v, fmean(v, f), 1L, f), fbetween(v, f, fill = TRUE)) expect_equal(TRA(v, fmean(v, f), 2L, f), fbetween(v, f)) expect_equal(TRA(v, fmean(v, f), 3L, f), fwithin(v, f)) expect_equal(TRA(v, fmean(v, f), 4L, f), fwithin(v, f, mean = "overall.mean")) expect_equal(TRA(m, fmean(m, f), 1L, f), fbetween(m, f, fill = TRUE)) expect_equal(TRA(m, fmean(m, f), 2L, f), fbetween(m, f)) expect_equal(TRA(m, fmean(m, f), 3L, f), fwithin(m, f)) expect_equal(TRA(m, fmean(m, f), 4L, f), fwithin(m, f, mean = "overall.mean")) expect_equal(TRA(d, fmean(d, f), 1L, f), fbetween(d, f, fill = TRUE)) expect_equal(TRA(d, fmean(d, f), 2L, f), fbetween(d, f)) expect_equal(TRA(d, fmean(d, f), 3L, f), fwithin(d, f)) expect_equal(TRA(d, fmean(d, f), 4L, f), fwithin(d, f, mean = "overall.mean")) }) test_that("TRA gives errors for wrong input", { expect_warning(TRA(v, fmean(v), bla = 1)) expect_warning(TRA(m, fmean(m), bla = 1)) expect_warning(TRA(d, fmean(d), bla = 1)) expect_error(TRA(v, 1:2)) expect_error(TRA(m, 1:2)) expect_error(TRA(d, 1:2)) expect_error(TRA(v, as.character(fmean(v)))) expect_error(TRA(m, as.character(fmean(m)))) expect_error(TRA(d, as.character(fmean(d)))) expect_error(TRA(v, fmean(v, f), "-", f[-1])) expect_error(TRA(m, fmean(m, f), "-", f[-1])) expect_error(TRA(d, fmean(d, f), "-", f[-1])) expect_error(TRA(v, fmean(v), 19L)) expect_error(TRA(m, fmean(m), 19L)) expect_error(TRA(d, fmean(d), 19L)) expect_error(TRA(v, fmean(v), "bla")) expect_error(TRA(m, fmean(m), "bla")) expect_error(TRA(d, fmean(d), "bla")) }) test_that("TRA handles different data types as intended", { # Vector & Matrix: Simple expect_true(is.integer(fnobs(letters, TRA = "replace_fill"))) expect_true(is.integer(fnobs(letters, TRA = "replace"))) for(i in c("-", "+", "*", "/", "%", "%%", "-%%")) expect_error(fnobs(letters, TRA = i)) expect_true(is.integer(fnobs(AirPassengers, TRA = "replace_fill"))) expect_true(is.integer(fnobs(AirPassengers, TRA = "replace"))) for(i in c("-", "+", "*", "/", "%", "%%", "-%%")) expect_true(is.numeric(fnobs(AirPassengers, TRA = i))) expect_true(is.integer(fnobs(EuStockMarkets, TRA = "replace_fill"))) expect_true(is.integer(fnobs(EuStockMarkets, TRA = "replace"))) for(i in c("-", "+", "*", "/", "%", "%%", "-%%")) expect_true(is.numeric(fnobs(EuStockMarkets, TRA = i))) # Vector & Matrix: Grouped set.seed(101) expect_true(is.integer(fnobs(letters, sample.int(3, length(letters), TRUE), TRA = "replace_fill"))) expect_true(is.integer(fnobs(letters, sample.int(3, length(letters), TRUE), TRA = "replace"))) for(i in c("-", "-+", "+", "*", "/", "%", "%%", "-%%")) expect_error(fnobs(letters, sample.int(3, length(letters), TRUE), TRA = i)) expect_true(is.integer(fnobs(AirPassengers, sample.int(3, length(AirPassengers), TRUE), TRA = "replace_fill"))) expect_true(is.integer(fnobs(AirPassengers, sample.int(3, length(AirPassengers), TRUE), TRA = "replace"))) for(i in c("-", "-+", "+", "*", "/", "%", "%%", "-%%")) expect_true(is.numeric(fnobs(AirPassengers, sample.int(3, length(AirPassengers), TRUE), TRA = i))) expect_true(is.integer(fnobs(EuStockMarkets, sample.int(3, nrow(EuStockMarkets), TRUE), TRA = "replace_fill"))) expect_true(is.integer(fnobs(EuStockMarkets, sample.int(3, nrow(EuStockMarkets), TRUE), TRA = "replace"))) for(i in c("-", "-+", "+", "*", "/", "%", "%%", "-%%")) expect_true(is.numeric(fnobs(EuStockMarkets, sample.int(3, nrow(EuStockMarkets), TRUE), TRA = i))) # Date Frame: Simple expect_equal(unname(vtypes(fndistinct(wlddev, TRA = "replace"))), rep("integer", 13)) expect_equal(unname(vtypes(fndistinct(wlddev, TRA = "replace_fill"))), rep("integer", 13)) expect_equal(vtypes(fmode(wlddev, TRA = "replace_fill")), vtypes(wlddev)) expect_equal(vtypes(ffirst(wlddev, TRA = "replace_fill")), vtypes(wlddev)) expect_equal(vtypes(flast(wlddev, TRA = "replace_fill")), vtypes(wlddev)) expect_equal(vtypes(fmode(wlddev, TRA = "replace")), vtypes(wlddev)) expect_equal(vtypes(ffirst(wlddev, TRA = "replace")), vtypes(wlddev)) expect_equal(vtypes(flast(wlddev, TRA = "replace")), vtypes(wlddev)) for(i in c("-", "+", "*", "/", "%", "%%", "-%%")) expect_equal(unname(vtypes(fnobs(nv(wlddev), TRA = i))), rep("double", 7)) for(i in c("-", "+", "*", "/", "%", "%%", "-%%")) expect_error(fnobs(wlddev, TRA = i)) # Date Frame: Grouped expect_equal(unname(vtypes(fndistinct(wlddev, wlddev$iso3c, TRA = "replace"))), rep("integer", 13)) expect_equal(unname(vtypes(fndistinct(wlddev, wlddev$iso3c, TRA = "replace_fill"))), rep("integer", 13)) expect_equal(vtypes(fmode(wlddev, wlddev$iso3c, TRA = "replace_fill")), vtypes(wlddev)) expect_equal(vtypes(ffirst(wlddev, wlddev$iso3c, TRA = "replace_fill")), vtypes(wlddev)) expect_equal(vtypes(flast(wlddev, wlddev$iso3c, TRA = "replace_fill")), vtypes(wlddev)) expect_equal(vtypes(fmode(wlddev, wlddev$iso3c, TRA = "replace")), vtypes(wlddev)) expect_equal(vtypes(ffirst(wlddev, wlddev$iso3c, TRA = "replace")), vtypes(wlddev)) expect_equal(vtypes(flast(wlddev, wlddev$iso3c, TRA = "replace")), vtypes(wlddev)) for(i in c("-", "-+", "+", "*", "/", "%", "%%", "-%%")) expect_equal(unname(vtypes(fnobs(nv(wlddev), wlddev$iso3c, TRA = i))), rep("double", 7)) for(i in c("-", "-+", "+", "*", "/", "%", "%%", "-%%")) expect_error(fnobs(wlddev, wlddev$iso3c, TRA = i)) }) collapse/tests/testthat/test-ffirst-flast.R0000644000176200001440000003560614062441127020575 0ustar liggesuserscontext("ffirst and flast") # TODO: Check matrix with list columns !! # Benchmark with groups: Bettr to check missing x ??? # rm(list = ls()) set.seed(101) x <- rnorm(100) w <- abs(100 * rnorm(100)) xNA <- x wNA <- w xNA[sample.int(100, 20)] <- NA wNA[sample.int(100, 20)] <- NA f <- as.factor(sample.int(10, 100, TRUE)) data <- wlddev[wlddev$iso3c %in% c("BLZ","IND","USA","SRB","GRL"), ] l <- nrow(data) g <- GRP(droplevels(data$iso3c)) dataNA <- na_insert(data) m <- as.matrix(data) mNA <- as.matrix(dataNA) data$LC <- as.list(data$PCGDP) dataNA$LC <- lapply(na_insert(data["LC"])[[1]], function(x) if(is.na(x)) NULL else x) basefirst <- function(x, na.rm = FALSE) { if(is.list(x)) return(if(na.rm) x[which(lengths(x) > 0L)[1L]] else x[1L]) if(na.rm) x[which(!is.na(x))[1L]] else x[1L] } baselast <- function(x, na.rm = FALSE) { lst <- function(x) x[length(x)] if(is.list(x)) return(if(na.rm) x[lst(which(lengths(x) > 0L))] else lst(x)) if(na.rm && !all(na <- is.na(x))) x[lst(which(!na))] else lst(x) } # ffirst test_that("ffirst performs like basefirst (defined above)", { expect_equal(ffirst(NA), basefirst(NA)) expect_equal(ffirst(NA, na.rm = FALSE), basefirst(NA)) expect_equal(ffirst(1), basefirst(1, na.rm = TRUE)) expect_equal(ffirst(1:3), basefirst(1:3, na.rm = TRUE)) expect_equal(ffirst(-1:1), basefirst(-1:1, na.rm = TRUE)) expect_equal(ffirst(1, na.rm = FALSE), basefirst(1)) expect_equal(ffirst(1:3, na.rm = FALSE), basefirst(1:3)) expect_equal(ffirst(-1:1, na.rm = FALSE), basefirst(-1:1)) expect_equal(ffirst(x), basefirst(x, na.rm = TRUE)) expect_equal(ffirst(x, na.rm = FALSE), basefirst(x)) expect_equal(ffirst(m[, 1]), basefirst(m[, 1])) expect_equal(ffirst(xNA, na.rm = FALSE), basefirst(xNA)) expect_equal(ffirst(xNA), basefirst(xNA, na.rm = TRUE)) expect_equal(ffirst(mNA[, 1]), basefirst(mNA[, 1], na.rm = TRUE)) expect_equal(ffirst(m), dapply(m, basefirst, na.rm = TRUE)) expect_equal(ffirst(m, na.rm = FALSE), dapply(m, basefirst)) expect_equal(ffirst(mNA, na.rm = FALSE), dapply(mNA, basefirst)) expect_equal(ffirst(mNA), dapply(mNA, basefirst, na.rm = TRUE)) expect_equal(ffirst(data, drop = FALSE), dapply(data, basefirst, na.rm = TRUE, drop = FALSE)) expect_equal(ffirst(data, na.rm = FALSE, drop = FALSE), dapply(data, basefirst, drop = FALSE)) expect_equal(ffirst(dataNA, na.rm = FALSE, drop = FALSE), dapply(dataNA, basefirst, drop = FALSE)) expect_equal(ffirst(dataNA, drop = FALSE), dapply(dataNA, basefirst, na.rm = TRUE, drop = FALSE)) expect_equal(ffirst(x, f), BY(x, f, basefirst, na.rm = TRUE)) expect_equal(ffirst(x, f, na.rm = FALSE), BY(x, f, basefirst)) expect_equal(ffirst(xNA, f, na.rm = FALSE), BY(xNA, f, basefirst)) expect_equal(ffirst(xNA, f), BY(xNA, f, basefirst, na.rm = TRUE)) expect_equal(ffirst(m, na.rm = FALSE), m[1L, ]) expect_equal(ffirst(m, na.rm = FALSE, drop = FALSE), setRownames(m[1L, , drop = FALSE], NULL)) expect_equal(ffirst(m, g), BY(setRownames(m, NULL), g, basefirst, na.rm = TRUE)) expect_equal(ffirst(m, g, na.rm = FALSE), BY(setRownames(m, NULL), g, basefirst)) expect_equal(ffirst(mNA, g, na.rm = FALSE), BY(setRownames(mNA, NULL), g, basefirst)) expect_equal(ffirst(mNA, g), BY(setRownames(mNA, NULL), g, basefirst, na.rm = TRUE)) expect_equal(ffirst(data, na.rm = FALSE, drop = FALSE), setRownames(data[1L, ])) expect_equal(ffirst(data, g, use.g.names = FALSE), BY(data, g, basefirst, na.rm = TRUE, use.g.names = FALSE)) expect_equal(setRownames(ffirst(data, g, na.rm = FALSE)), BY(data, g, basefirst, use.g.names = FALSE)) expect_equal(setRownames(ffirst(dataNA, g, na.rm = FALSE)), BY(dataNA, g, basefirst, use.g.names = FALSE)) expect_equal(ffirst(dataNA, g, use.g.names = FALSE), BY(dataNA, g, basefirst, na.rm = TRUE, use.g.names = FALSE)) }) test_that("ffirst performs numerically stable", { expect_true(all_obj_equal(replicate(50, ffirst(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(data), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(data, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(dataNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(dataNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(data, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(data, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(dataNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(dataNA, g), simplify = FALSE))) }) test_that("ffirst handles special values in the right way", { expect_equal(ffirst(NA), NA) expect_equal(ffirst(NaN), NaN) expect_equal(ffirst(Inf), Inf) expect_equal(ffirst(-Inf), -Inf) expect_equal(ffirst(TRUE), TRUE) expect_equal(ffirst(FALSE), FALSE) expect_equal(ffirst(NA, na.rm = FALSE), NA) expect_equal(ffirst(NaN, na.rm = FALSE), NaN) expect_equal(ffirst(Inf, na.rm = FALSE), Inf) expect_equal(ffirst(-Inf, na.rm = FALSE), -Inf) expect_equal(ffirst(TRUE, na.rm = FALSE), TRUE) expect_equal(ffirst(FALSE, na.rm = FALSE), FALSE) expect_equal(ffirst(c(1,NA)), 1) expect_equal(ffirst(c(1,NaN)), 1) expect_equal(ffirst(c(1,Inf)), 1) expect_equal(ffirst(c(1,-Inf)), 1) expect_equal(ffirst(c(FALSE,TRUE)), FALSE) expect_equal(ffirst(c(TRUE,FALSE)), TRUE) expect_equal(ffirst(c(1,Inf), na.rm = FALSE), 1) expect_equal(ffirst(c(1,-Inf), na.rm = FALSE), 1) expect_equal(ffirst(c(FALSE,TRUE), na.rm = FALSE), FALSE) expect_equal(ffirst(c(TRUE,FALSE), na.rm = FALSE), TRUE) }) test_that("ffirst produces errors for wrong input", { expect_visible(ffirst("a")) expect_visible(ffirst(NA_character_)) expect_visible(ffirst(mNA)) expect_error(ffirst(mNA, f)) expect_error(ffirst(1:2,1:3)) expect_error(ffirst(m,1:31)) expect_error(ffirst(data,1:31)) expect_warning(ffirst("a", w = 1)) expect_warning(ffirst(1:2, w = 1:3)) expect_warning(ffirst(NA_character_, w = 1)) expect_warning(ffirst(mNA, w = wdat)) expect_error(ffirst(mNA, f, 2)) expect_warning(ffirst(mNA, w = 1:33)) expect_error(ffirst(1:2,1:2, 1:3)) expect_error(ffirst(m,1:32,1:20)) expect_error(ffirst(data,1:32,1:10)) expect_warning(ffirst(1:2, w = c("a","b"))) expect_visible(ffirst(wlddev)) expect_warning(ffirst(wlddev, w = wlddev$year, drop = FALSE)) expect_visible(ffirst(wlddev, wlddev$iso3c)) expect_error(ffirst(wlddev, wlddev$iso3c, wlddev$year)) }) # flast test_that("flast performs like baselast (defined above)", { expect_equal(flast(NA), baselast(NA)) expect_equal(flast(NA, na.rm = FALSE), baselast(NA)) expect_equal(flast(1), baselast(1, na.rm = TRUE)) expect_equal(flast(1:3), baselast(1:3, na.rm = TRUE)) expect_equal(flast(-1:1), baselast(-1:1, na.rm = TRUE)) expect_equal(flast(1, na.rm = FALSE), baselast(1)) expect_equal(flast(1:3, na.rm = FALSE), baselast(1:3)) expect_equal(flast(-1:1, na.rm = FALSE), baselast(-1:1)) expect_equal(flast(x), baselast(x, na.rm = TRUE)) expect_equal(flast(x, na.rm = FALSE), baselast(x)) expect_equal(flast(m[, 1]), baselast(m[, 1])) expect_equal(flast(xNA, na.rm = FALSE), baselast(xNA)) expect_equal(flast(xNA), baselast(xNA, na.rm = TRUE)) expect_equal(flast(mNA[, 1]), baselast(mNA[, 1], na.rm = TRUE)) expect_equal(flast(m), dapply(m, baselast, na.rm = TRUE)) expect_equal(flast(m, na.rm = FALSE), dapply(m, baselast)) expect_equal(flast(mNA, na.rm = FALSE), dapply(mNA, baselast)) expect_equal(flast(mNA), dapply(mNA, baselast, na.rm = TRUE)) expect_equal(flast(data, drop = FALSE), dapply(data, baselast, na.rm = TRUE, drop = FALSE)) expect_equal(flast(data, na.rm = FALSE, drop = FALSE), dapply(data, baselast, drop = FALSE)) expect_equal(flast(dataNA, na.rm = FALSE, drop = FALSE), dapply(dataNA, baselast, drop = FALSE)) expect_equal(flast(dataNA, drop = FALSE), dapply(dataNA, baselast, na.rm = TRUE, drop = FALSE)) expect_equal(flast(x, f), BY(x, f, baselast, na.rm = TRUE)) expect_equal(flast(x, f, na.rm = FALSE), BY(x, f, baselast)) expect_equal(flast(xNA, f, na.rm = FALSE), BY(xNA, f, baselast)) expect_equal(flast(xNA, f), BY(xNA, f, baselast, na.rm = TRUE)) expect_equal(flast(m, na.rm = FALSE), m[nrow(m), ]) expect_equal(flast(m, na.rm = FALSE, drop = FALSE), setRownames(m[nrow(m), , drop = FALSE], NULL)) expect_equal(flast(m, g), BY(setRownames(m, NULL), g, baselast, na.rm = TRUE)) expect_equal(flast(m, g, na.rm = FALSE), BY(setRownames(m, NULL), g, baselast)) expect_equal(flast(mNA, g, na.rm = FALSE), BY(setRownames(mNA, NULL), g, baselast)) expect_equal(flast(mNA, g), BY(setRownames(mNA, NULL), g, baselast, na.rm = TRUE)) expect_equal(flast(data, na.rm = FALSE, drop = FALSE), setRownames(data[nrow(data), ])) expect_equal(flast(data, g, use.g.names = FALSE), BY(data, g, baselast, na.rm = TRUE, use.g.names = FALSE)) expect_equal(setRownames(flast(data, g, na.rm = FALSE, use.g.names = FALSE)), BY(data, g, baselast, use.g.names = FALSE)) expect_equal(setRownames(flast(dataNA, g, na.rm = FALSE, use.g.names = FALSE)), BY(dataNA, g, baselast, use.g.names = FALSE)) expect_equal(flast(dataNA, g, use.g.names = FALSE), BY(dataNA, g, baselast, na.rm = TRUE, use.g.names = FALSE)) }) test_that("flast performs numerically stable", { expect_true(all_obj_equal(replicate(50, flast(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(data), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(data, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(dataNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(dataNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(data, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(data, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(dataNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(dataNA, g), simplify = FALSE))) }) test_that("flast handles special values in the right way", { expect_equal(flast(NA), NA) expect_equal(flast(NaN), NaN) expect_equal(flast(Inf), Inf) expect_equal(flast(-Inf), -Inf) expect_equal(flast(TRUE), TRUE) expect_equal(flast(FALSE), FALSE) expect_equal(flast(NA, na.rm = FALSE), NA) expect_equal(flast(NaN, na.rm = FALSE), NaN) expect_equal(flast(Inf, na.rm = FALSE), Inf) expect_equal(flast(-Inf, na.rm = FALSE), -Inf) expect_equal(flast(TRUE, na.rm = FALSE), TRUE) expect_equal(flast(FALSE, na.rm = FALSE), FALSE) expect_equal(flast(c(1,NA)), 1) expect_equal(flast(c(1,NaN)), 1) expect_equal(flast(c(1,Inf)), Inf) expect_equal(flast(c(1,-Inf)), -Inf) expect_equal(flast(c(FALSE,TRUE)), TRUE) expect_equal(flast(c(TRUE,FALSE)), FALSE) expect_equal(flast(c(1,Inf), na.rm = FALSE), Inf) expect_equal(flast(c(1,-Inf), na.rm = FALSE), -Inf) expect_equal(flast(c(FALSE,TRUE), na.rm = FALSE), TRUE) expect_equal(flast(c(TRUE,FALSE), na.rm = FALSE), FALSE) }) test_that("flast produces errors for wrong input", { expect_visible(flast("a")) expect_visible(flast(NA_character_)) expect_visible(flast(mNA)) expect_error(flast(mNA, f)) expect_error(flast(1:2,1:3)) expect_error(flast(m,1:31)) expect_error(flast(data,1:31)) expect_warning(flast("a", w = 1)) expect_warning(flast(1:2, w = 1:3)) expect_warning(flast(NA_character_, w = 1)) expect_warning(flast(mNA, w = wdat)) expect_error(flast(mNA, f, wdat)) expect_warning(flast(mNA, w = 1:33)) expect_error(flast(1:2,1:2, 1:3)) expect_error(flast(m,1:32,1:20)) expect_error(flast(data,1:32,1:10)) expect_warning(flast(1:2, w = c("a","b"))) expect_visible(flast(wlddev)) expect_warning(flast(wlddev, w = wlddev$year, drop = FALSE)) expect_visible(flast(wlddev, wlddev$iso3c)) expect_error(flast(wlddev, wlddev$iso3c, wlddev$year)) }) collapse/tests/testthat/test-fbetween-fwithin-B-W.R0000644000176200001440000021265314167352531022026 0ustar liggesuserscontext("fbetween / B and fwithin / W") # rm(list = ls()) set.seed(101) x <- rnorm(100) w <- abs(100*rnorm(100)) wdat <- abs(100*rnorm(32)) xNA <- x wNA <- w wdatNA <- wdat xNA[sample.int(100,20)] <- NA wNA[sample.int(100,20)] <- NA wdatNA[sample.int(32, 5)] <- NA f <- as.factor(rep(1:10, each = 10)) g <- as.factor(rep(c(1,2,2,3,3,3,4,4,4,4,5,5,5,5,5,7,7,7,7,7,7,7,10,10,10,10,10,10,10,10,10,10))) mtcNA <- na_insert(mtcars) mtcNA[1,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" # x = rnorm(1e7) # xNA = x # xNA[sample.int(1e7,1e6)] <- NA # w = abs(100*rnorm(1e7)) # wNA = w # wNA[sample.int(1e7,1e6)] <- NA # microbenchmark(fwithin(xNA), fbetween(xNA), fbetween(xNA, w = w), fwithin(xNA, w = w), fbetween(xNA, w = wNA), fwithin(xNA, w = wNA)) # Unit: milliseconds # expr min lq mean median uq max neval cld # fwithin(xNA) 59.89809 61.45215 81.20188 63.21997 65.99563 303.5464 100 a # fbetween(xNA) 71.32829 73.00953 86.06850 74.51227 77.79108 275.6274 100 ab # fbetween(xNA, w = w) 81.95167 84.85050 106.61714 86.65870 90.92104 314.8245 100 cd # fwithin(xNA, w = w) 71.24841 73.72264 88.08572 75.32935 80.46232 279.5597 100 a c # fbetween(xNA, w = wNA) 90.99712 93.71455 107.38818 95.91545 98.16989 328.8951 100 d # fwithin(xNA, w = wNA) 80.13678 83.62511 103.55614 86.22361 93.18352 301.7070 100 bcd bsum <- base::sum between <- function(x, na.rm = FALSE) { if(!na.rm) return(ave(x)) cc <- !is.na(x) x[cc] <- ave(x[cc]) return(x) } within <- function(x, na.rm = FALSE, mean = 0) { if(!na.rm) return(x - ave(x) + mean) cc <- !is.na(x) m <- bsum(x[cc]) / bsum(cc) return(x - m + mean) } # NOTE: This is what fbetween and fwithin currently do: If missing values, compute weighted mean on available obs, and center x using it. But don't insert additional missing values in x for missing weights .. wbetween <- function(x, w, na.rm = FALSE) { if(na.rm) { xcc <- !is.na(x) cc <- xcc & !is.na(w) w <- w[cc] wm <- bsum(w * x[cc]) / bsum(w) x[xcc] <- rep(wm, bsum(xcc)) return(x) } else { wm <- bsum(w * x) / bsum(w) return(rep(wm, length(x))) } } wwithin <- function(x, w, na.rm = FALSE, mean = 0) { if(na.rm) { cc <- complete.cases(x, w) w <- w[cc] wm <- bsum(w * x[cc]) / bsum(w) } else wm <- bsum(w * x) / bsum(w) return(x - wm + mean) } wBY <- function(x, f, FUN, w, ...) { if(is.atomic(x) && !is.array(x)) return(unlist(Map(FUN, split(x, f), split(w, f), ...), use.names = FALSE)) wspl <- split(w, f) if(is.atomic(x)) return(dapply(x, function(xi) unlist(Map(FUN, split(xi, f), wspl, ...), use.names = FALSE))) qDF(dapply(x, function(xi) unlist(Map(FUN, split(xi, f), wspl, ...), use.names = FALSE), return = "matrix")) } rn <- function(x) setRownames(x, NULL) # fbetween test_that("fbetween performs like between", { expect_equal(fbetween(NA), as.double(between(NA))) expect_equal(fbetween(NA, na.rm = FALSE), as.double(between(NA))) expect_equal(fbetween(1), between(1, na.rm = TRUE)) expect_equal(fbetween(1:3), between(1:3, na.rm = TRUE)) expect_equal(fbetween(-1:1), between(-1:1, na.rm = TRUE)) expect_equal(fbetween(1, na.rm = FALSE), between(1)) expect_equal(fbetween(1:3, na.rm = FALSE), between(1:3)) expect_equal(fbetween(-1:1, na.rm = FALSE), between(-1:1)) expect_equal(fbetween(x), between(x, na.rm = TRUE)) expect_equal(fbetween(x, na.rm = FALSE), between(x)) expect_equal(fbetween(xNA, na.rm = FALSE), between(xNA)) expect_equal(fbetween(xNA), between(xNA, na.rm = TRUE)) expect_equal(qM(fbetween(mtcars)), fbetween(m)) expect_equal(fbetween(m), dapply(m, between, na.rm = TRUE)) expect_equal(fbetween(m, na.rm = FALSE), dapply(m, between)) expect_equal(fbetween(mNA, na.rm = FALSE), dapply(mNA, between)) expect_equal(fbetween(mNA), dapply(mNA, between, na.rm = TRUE)) expect_equal(fbetween(mtcars), dapply(mtcars, between, na.rm = TRUE)) expect_equal(fbetween(mtcars, na.rm = FALSE), dapply(mtcars, between)) expect_equal(fbetween(mtcNA, na.rm = FALSE), dapply(mtcNA, between)) expect_equal(fbetween(mtcNA), dapply(mtcNA, between, na.rm = TRUE)) expect_equal(fbetween(x, f), BY(x, f, between, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fbetween(x, f, na.rm = FALSE), BY(x, f, between, use.g.names = FALSE)) expect_equal(fbetween(xNA, f, na.rm = FALSE), BY(xNA, f, between, use.g.names = FALSE)) expect_equal(fbetween(xNA, f), BY(xNA, f, between, na.rm = TRUE, use.g.names = FALSE)) expect_equal(rn(fbetween(m, g)), BY(m, g, between, na.rm = TRUE, use.g.names = FALSE)) expect_equal(rn(fbetween(m, g, na.rm = FALSE)), BY(m, g, between, use.g.names = FALSE)) expect_equal(rn(fbetween(mNA, g, na.rm = FALSE)), BY(mNA, g, between, use.g.names = FALSE)) expect_equal(rn(fbetween(mNA, g)), BY(mNA, g, between, na.rm = TRUE, use.g.names = FALSE)) expect_equal(rn(fbetween(mtcars, g)), BY(mtcars, g, between, na.rm = TRUE, use.g.names = FALSE)) expect_equal(rn(fbetween(mtcars, g, na.rm = FALSE)), BY(mtcars, g, between, use.g.names = FALSE)) expect_equal(rn(fbetween(mtcNA, g, na.rm = FALSE)), BY(mtcNA, g, between, use.g.names = FALSE)) expect_equal(rn(fbetween(mtcNA, g)), BY(mtcNA, g, between, na.rm = TRUE, use.g.names = FALSE)) }) test_that("fbetween performs like fbetween with weights all equal", { expect_equal(fbetween(NA), fbetween(NA, w = 0.99999999)) expect_equal(fbetween(NA, na.rm = FALSE), fbetween(NA, w = 2.946, na.rm = FALSE)) expect_equal(fbetween(1), fbetween(1, w = 3)) expect_equal(fbetween(1:3), fbetween(1:3, w = rep(0.999,3))) expect_equal(fbetween(-1:1), fbetween(-1:1, w = rep(4.2,3))) expect_equal(fbetween(1, na.rm = FALSE), fbetween(1, w = 5, na.rm = FALSE)) expect_equal(fbetween(1:3, na.rm = FALSE), fbetween(1:3, w = rep(1.44565, 3), na.rm = FALSE)) expect_equal(fbetween(-1:1, na.rm = FALSE), fbetween(-1:1, w = rep(1.44565, 3), na.rm = FALSE)) expect_equal(fbetween(x), fbetween(x, w = rep(1,100))) expect_equal(fbetween(x, na.rm = FALSE), fbetween(x, w = rep(1.44565, 100), na.rm = FALSE)) expect_equal(fbetween(xNA, na.rm = FALSE), fbetween(xNA, w = rep(4.676587, 100), na.rm = FALSE)) expect_equal(fbetween(xNA), fbetween(xNA, w = rep(4.676587, 100))) expect_equal(fbetween(m), fbetween(m, w = rep(6587.3454, 32))) expect_equal(fbetween(m, na.rm = FALSE), fbetween(m, w = rep(6587.3454, 32), na.rm = FALSE)) expect_equal(fbetween(mNA, na.rm = FALSE), fbetween(mNA, w = rep(6587.3454, 32), na.rm = FALSE)) expect_equal(fbetween(mNA), fbetween(mNA, w = rep(6587.3454, 32))) expect_equal(fbetween(mtcars), fbetween(mtcars, w = rep(6787.3454, 32))) expect_equal(fbetween(mtcars, na.rm = FALSE), fbetween(mtcars, w = rep(6787.3454, 32), na.rm = FALSE)) expect_equal(fbetween(mtcNA, na.rm = FALSE), fbetween(mtcNA, w = rep(6787.3454, 32), na.rm = FALSE)) expect_equal(fbetween(mtcNA), fbetween(mtcNA, w = rep(6787.3454, 32))) expect_equal(fbetween(x, f), fbetween(x, f, rep(546.78,100))) expect_equal(fbetween(x, f, na.rm = FALSE), fbetween(x, f, rep(5.88,100), na.rm = FALSE)) expect_equal(fbetween(xNA, f, na.rm = FALSE), fbetween(xNA, f, rep(52.7,100), na.rm = FALSE)) expect_equal(fbetween(xNA, f), fbetween(xNA, f, rep(5997456,100))) expect_equal(fbetween(m, g), fbetween(m, g, rep(546.78,32))) expect_equal(fbetween(m, g, na.rm = FALSE), fbetween(m, g, rep(0.0001,32), na.rm = FALSE)) expect_equal(fbetween(mNA, g, na.rm = FALSE), fbetween(mNA, g, rep(5.7,32), na.rm = FALSE)) expect_equal(fbetween(mNA, g), fbetween(mNA, g, rep(1.1,32))) expect_equal(fbetween(mtcars, g), fbetween(mtcars, g, rep(53,32))) expect_equal(fbetween(mtcars, g, na.rm = FALSE), fbetween(mtcars, g, rep(546.78,32), na.rm = FALSE)) expect_equal(fbetween(mtcNA, g, na.rm = FALSE), fbetween(mtcNA, g, rep(0.999999,32), na.rm = FALSE)) expect_equal(fbetween(mtcNA, g), fbetween(mtcNA, g, rep(999.9999,32))) }) test_that("fbetween with weights performs like wbetween (defined above)", { # complete weights expect_equal(fbetween(NA, w = 1), wbetween(NA, 1)) expect_equal(fbetween(NA, w = 1, na.rm = FALSE), wbetween(NA, 1)) expect_equal(fbetween(1, w = 1), wbetween(1, w = 1)) expect_equal(fbetween(1:3, w = 1:3), wbetween(1:3, 1:3)) expect_equal(fbetween(-1:1, w = 1:3), wbetween(-1:1, 1:3)) expect_equal(fbetween(1, w = 1, na.rm = FALSE), wbetween(1, 1)) expect_equal(fbetween(1:3, w = c(0.99,3454,1.111), na.rm = FALSE), wbetween(1:3, c(0.99,3454,1.111))) expect_equal(fbetween(-1:1, w = 1:3, na.rm = FALSE), wbetween(-1:1, 1:3)) expect_equal(fbetween(x, w = w), wbetween(x, w)) expect_equal(fbetween(x, w = w, na.rm = FALSE), wbetween(x, w)) expect_equal(fbetween(xNA, w = w, na.rm = FALSE), wbetween(xNA, w)) expect_equal(fbetween(xNA, w = w), wbetween(xNA, w, na.rm = TRUE)) expect_equal(qM(fbetween(mtcars, w = wdat)), fbetween(m, w = wdat)) expect_equal(fbetween(m, w = wdat), dapply(m, wbetween, wdat, na.rm = TRUE)) expect_equal(fbetween(m, w = wdat, na.rm = FALSE), dapply(m, wbetween, wdat)) expect_equal(fbetween(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wbetween, wdat)) expect_equal(fbetween(mNA, w = wdat), dapply(mNA, wbetween, wdat, na.rm = TRUE)) expect_equal(fbetween(mtcars, w = wdat), dapply(mtcars, wbetween, wdat, na.rm = TRUE)) expect_equal(fbetween(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wbetween, wdat)) expect_equal(fbetween(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wbetween, wdat)) expect_equal(fbetween(mtcNA, w = wdat), dapply(mtcNA, wbetween, wdat, na.rm = TRUE)) expect_equal(fbetween(x, f, w), wBY(x, f, wbetween, w)) expect_equal(fbetween(x, f, w, na.rm = FALSE), wBY(x, f, wbetween, w)) expect_equal(fbetween(xNA, f, w, na.rm = FALSE), wBY(xNA, f, wbetween, w)) expect_equal(fbetween(xNA, f, w), wBY(xNA, f, wbetween, w, na.rm = TRUE)) expect_equal(fbetween(m, g, wdat), wBY(m, g, wbetween, wdat)) expect_equal(fbetween(m, g, wdat, na.rm = FALSE), wBY(m, g, wbetween, wdat)) expect_equal(fbetween(mNA, g, wdat, na.rm = FALSE), wBY(mNA, g, wbetween, wdat)) expect_equal(fbetween(mNA, g, wdat), wBY(mNA, g, wbetween, wdat, na.rm = TRUE)) expect_equal(fbetween(mtcars, g, wdat), wBY(mtcars, g, wbetween, wdat)) expect_equal(fbetween(mtcars, g, wdat, na.rm = FALSE), wBY(mtcars, g, wbetween, wdat)) expect_equal(fbetween(mtcNA, g, wdat, na.rm = FALSE), wBY(mtcNA, g, wbetween, wdat)) expect_equal(fbetween(mtcNA, g, wdat), wBY(mtcNA, g, wbetween, wdat, na.rm = TRUE)) # missing weights expect_equal(fbetween(NA, w = NA), wbetween(NA, NA)) expect_equal(fbetween(NA, w = NA, na.rm = FALSE), wbetween(NA, NA)) expect_equal(fbetween(1, w = NA), wbetween(1, w = NA)) expect_equal(fbetween(1:3, w = c(NA,1:2)), wbetween(1:3, c(NA,1:2), na.rm = TRUE)) expect_equal(fbetween(-1:1, w = c(NA,1:2)), wbetween(-1:1, c(NA,1:2), na.rm = TRUE)) expect_equal(fbetween(1, w = NA, na.rm = FALSE), wbetween(1, NA)) expect_equal(fbetween(1:3, w = c(NA,1:2), na.rm = FALSE), wbetween(1:3, c(NA,1:2))) expect_equal(fbetween(-1:1, w = c(NA,1:2), na.rm = FALSE), wbetween(-1:1, c(NA,1:2))) expect_equal(fbetween(x, w = wNA), wbetween(x, wNA, na.rm = TRUE)) expect_equal(fbetween(x, w = wNA, na.rm = FALSE), wbetween(x, wNA)) expect_equal(fbetween(xNA, w = wNA, na.rm = FALSE), wbetween(xNA, wNA)) expect_equal(fbetween(xNA, w = wNA), wbetween(xNA, wNA, na.rm = TRUE)) expect_equal(qM(fbetween(mtcars, w = wdatNA)), fbetween(m, w = wdatNA)) expect_equal(fbetween(m, w = wdatNA), dapply(m, wbetween, wdatNA, na.rm = TRUE)) expect_equal(fbetween(m, w = wdatNA, na.rm = FALSE), dapply(m, wbetween, wdatNA)) expect_equal(fbetween(mNA, w = wdatNA, na.rm = FALSE), dapply(mNA, wbetween, wdatNA)) expect_equal(fbetween(mNA, w = wdatNA), dapply(mNA, wbetween, wdatNA, na.rm = TRUE)) expect_equal(fbetween(mtcars, w = wdatNA), dapply(mtcars, wbetween, wdatNA, na.rm = TRUE)) expect_equal(fbetween(mtcars, w = wdatNA, na.rm = FALSE), dapply(mtcars, wbetween, wdatNA)) expect_equal(fbetween(mtcNA, w = wdatNA, na.rm = FALSE), dapply(mtcNA, wbetween, wdatNA)) expect_equal(fbetween(mtcNA, w = wdatNA), dapply(mtcNA, wbetween, wdatNA, na.rm = TRUE)) expect_equal(fbetween(x, f, wNA), wBY(x, f, wbetween, wNA, na.rm = TRUE)) expect_equal(fbetween(x, f, wNA, na.rm = FALSE), wBY(x, f, wbetween, wNA)) expect_equal(fbetween(xNA, f, wNA, na.rm = FALSE), wBY(xNA, f, wbetween, wNA)) expect_equal(fbetween(xNA, f, wNA), wBY(xNA, f, wbetween, wNA, na.rm = TRUE)) expect_equal(fbetween(m, g, wdatNA), wBY(m, g, wbetween, wdatNA, na.rm = TRUE)) expect_equal(fbetween(m, g, wdatNA, na.rm = FALSE), wBY(m, g, wbetween, wdatNA)) expect_equal(fbetween(mNA, g, wdatNA, na.rm = FALSE), wBY(mNA, g, wbetween, wdatNA)) expect_equal(fbetween(mNA, g, wdatNA), wBY(mNA, g, wbetween, wdatNA, na.rm = TRUE)) expect_equal(fbetween(mtcars, g, wdatNA), wBY(mtcars, g, wbetween, wdatNA, na.rm = TRUE)) expect_equal(fbetween(mtcars, g, wdatNA, na.rm = FALSE), wBY(mtcars, g, wbetween, wdatNA)) expect_equal(fbetween(mtcNA, g, wdatNA, na.rm = FALSE), wBY(mtcNA, g, wbetween, wdatNA)) expect_equal(fbetween(mtcNA, g, wdatNA), wBY(mtcNA, g, wbetween, wdatNA, na.rm = TRUE)) }) test_that("fbetween performs numerically stable", { expect_true(all_obj_equal(replicate(50, fbetween(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, g), simplify = FALSE))) }) test_that("fbetween with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fbetween(1, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(NA, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(NA, w = 1, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fbetween with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fbetween(1, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(NA, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(NA, w = NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, g, wdatNA), simplify = FALSE))) }) test_that("fbetween handles special values in the right way", { expect_equal(fbetween(NA), NA_real_) expect_equal(fbetween(NaN), NaN) expect_equal(fbetween(Inf), Inf) expect_equal(fbetween(c(Inf,Inf)), c(Inf,Inf)) expect_equal(fbetween(-Inf), -Inf) expect_equal(fbetween(c(-Inf,-Inf)), c(-Inf,-Inf)) expect_equal(fbetween(TRUE), 1) expect_equal(fbetween(FALSE), 0) expect_equal(fbetween(NA, na.rm = FALSE), NA_real_) expect_equal(fbetween(NaN, na.rm = FALSE), NaN) expect_equal(fbetween(Inf, na.rm = FALSE), Inf) expect_equal(fbetween(c(Inf,Inf), na.rm = FALSE), c(Inf,Inf)) expect_equal(fbetween(-Inf, na.rm = FALSE), -Inf) expect_equal(fbetween(c(-Inf,-Inf), na.rm = FALSE), c(-Inf,-Inf)) expect_equal(fbetween(TRUE, na.rm = FALSE), 1) expect_equal(fbetween(FALSE, na.rm = FALSE), 0) expect_equal(fbetween(c(1,NA)), c(1,NA_real_)) expect_equal(fbetween(c(1,NaN)), c(1,NaN)) expect_equal(fbetween(c(1,Inf)), c(Inf,Inf)) expect_equal(fbetween(c(1,-Inf)), c(-Inf,-Inf)) expect_equal(fbetween(c(1,Inf), na.rm = FALSE), c(Inf,Inf)) expect_equal(fbetween(c(1,-Inf), na.rm = FALSE), c(-Inf,-Inf)) expect_equal(fbetween(c(NA,-Inf), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(c(FALSE,FALSE), na.rm = FALSE), c(0,0)) expect_equal(fbetween(c(1,NA), na.rm = FALSE), c(NA_real_,NA_real_)) }) test_that("fbetween with weights handles special values in the right way", { expect_equal(fbetween(NA, w = 1), NA_real_) expect_equal(fbetween(NaN, w = 1), NaN) expect_equal(fbetween(Inf, w = 1), Inf) expect_equal(fbetween(c(Inf,Inf), w = 1:2), c(Inf,Inf)) expect_equal(fbetween(-Inf, w = 1), -Inf) expect_equal(fbetween(c(-Inf,-Inf), w = 1:2), c(-Inf,-Inf)) expect_equal(fbetween(TRUE, w = 1), 1) expect_equal(fbetween(FALSE, w = 1), 0) expect_equal(fbetween(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fbetween(NaN, w = 1, na.rm = FALSE), NaN) expect_equal(fbetween(Inf, w = 1, na.rm = FALSE), Inf) expect_equal(fbetween(c(Inf,Inf), w = 1:2, na.rm = FALSE), c(Inf,Inf)) expect_equal(fbetween(-Inf, w = 1, na.rm = FALSE), -Inf) expect_equal(fbetween(c(-Inf,-Inf), w = 1:2, na.rm = FALSE), c(-Inf,-Inf)) expect_equal(fbetween(TRUE, w = 1, na.rm = FALSE), 1) expect_equal(fbetween(FALSE, w = 1, na.rm = FALSE), 0) expect_equal(fbetween(c(1,NA), w = 1:2), c(1,NA_real_)) expect_equal(fbetween(c(1,NaN), w = 1:2), c(1,NaN)) expect_equal(fbetween(c(1,Inf), w = 1:2), c(Inf,Inf)) expect_equal(fbetween(c(1,-Inf), w = 1:2), c(-Inf,-Inf)) expect_equal(fbetween(c(1,Inf), w = 1:2, na.rm = FALSE), c(Inf,Inf)) expect_equal(fbetween(c(1,-Inf), w = 1:2, na.rm = FALSE), c(-Inf,-Inf)) expect_equal(fbetween(c(NA,-Inf), w = 1:2, na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(c(FALSE,FALSE), w = 1:2, na.rm = FALSE), c(0,0)) expect_equal(fbetween(c(1,NA), w = 1:2, na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(1:3, w = c(1,Inf,3)), c(NA_real_,NA_real_,NA_real_)) expect_equal(fbetween(1:3, w = c(1,-Inf,3)), c(NA_real_,NA_real_,NA_real_)) expect_equal(fbetween(1:3, w = c(1,Inf,3), na.rm = FALSE), c(NA_real_,NA_real_,NA_real_)) expect_equal(fbetween(1:3, w = c(1,-Inf,3), na.rm = FALSE), c(NA_real_,NA_real_,NA_real_)) expect_equal(fbetween(NA, w = NA), NA_real_) expect_equal(fbetween(NaN, w = NA), NaN) expect_equal(fbetween(Inf, w = NA), NA_real_) expect_equal(fbetween(c(Inf,Inf), w = c(NA,2)), c(Inf,Inf)) expect_equal(fbetween(-Inf, w = NA), NA_real_) expect_equal(fbetween(c(-Inf,-Inf), w = c(NA,2)), c(-Inf,-Inf)) expect_equal(fbetween(TRUE, w = NA), NA_real_) expect_equal(fbetween(FALSE, w = NA), NA_real_) expect_equal(fbetween(NA, w = NA, na.rm = FALSE), NA_real_) expect_equal(fbetween(NaN, w = NA, na.rm = FALSE), NA_real_) expect_equal(fbetween(Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fbetween(c(Inf,Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(-Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fbetween(c(-Inf,-Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(TRUE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fbetween(FALSE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fbetween(c(1,NA), w = c(NA,2)), c(NA_real_,NA_real_)) expect_equal(fbetween(c(1,NaN), w = c(NA,2)), c(NA_real_,NA_real_)) expect_equal(fbetween(c(1,Inf), w = c(NA,2)), c(Inf,Inf)) expect_equal(fbetween(c(1,-Inf), w = c(NA,2)), c(-Inf,-Inf)) expect_equal(fbetween(c(1,Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(c(1,-Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(c(NA,-Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(c(FALSE,FALSE), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(c(1,NA), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(1:3, w = c(NA,Inf,3)), c(NA_real_,NA_real_,NA_real_)) expect_equal(fbetween(1:3, w = c(NA,-Inf,3)), c(NA_real_,NA_real_,NA_real_)) expect_equal(fbetween(1:3, w = c(NA,Inf,3), na.rm = FALSE), c(NA_real_,NA_real_,NA_real_)) expect_equal(fbetween(1:3, w = c(NA,-Inf,3), na.rm = FALSE), c(NA_real_,NA_real_,NA_real_)) }) test_that("fbetween produces errors for wrong input", { expect_error(fbetween("a")) expect_error(fbetween(NA_character_)) expect_error(fbetween(mNAc)) expect_error(fbetween(mNAc, f)) expect_error(fbetween(1:2,1:3)) expect_error(fbetween(m,1:31)) expect_error(fbetween(mtcars,1:31)) expect_error(fbetween(mtcars, w = 1:31)) expect_error(fbetween("a", w = 1)) expect_error(fbetween(1:2, w = 1:3)) expect_error(fbetween(NA_character_, w = 1)) expect_error(fbetween(mNAc, w = wdat)) expect_error(fbetween(mNAc, f, wdat)) expect_error(fbetween(mNA, w = 1:33)) expect_error(fbetween(1:2,1:2, 1:3)) expect_error(fbetween(m,1:32,1:20)) expect_error(fbetween(mtcars,1:32,1:10)) expect_error(fbetween(1:2, w = c("a","b"))) expect_error(fbetween(wlddev)) expect_error(fbetween(wlddev, w = wlddev$year)) expect_error(fbetween(wlddev, wlddev$iso3c)) expect_error(fbetween(wlddev, wlddev$iso3c, wlddev$year)) }) # B test_that("B produces errors for wrong input", { expect_error(B("a")) expect_error(B(NA_character_)) expect_error(B(mNAc)) expect_error(B(mNAc, f)) expect_error(B(1:2,1:3)) expect_error(B(m,1:31)) expect_error(B(mtcars,1:31)) expect_error(B(mtcars, w = 1:31)) expect_error(B("a", w = 1)) expect_error(B(1:2, w = c("a","b"))) expect_error(B(1:2, w = 1:3)) expect_error(B(NA_character_, w = 1)) expect_error(B(mNAc, w = wdat)) expect_error(B(mNAc, f, wdat)) expect_error(B(mNA, w = 1:33)) expect_error(B(mtcNA, w = 1:33)) expect_error(B(1:2,1:2, 1:3)) expect_error(B(m,1:32,1:20)) expect_error(B(mtcars,1:32,1:10)) expect_error(B(1:2, 1:3, 1:2)) expect_error(B(m,1:31,1:32)) expect_error(B(mtcars,1:33,1:32)) }) test_that("B.data.frame method is foolproof", { expect_visible(B(wlddev)) expect_visible(B(wlddev, w = wlddev$year)) expect_visible(B(wlddev, w = ~year)) expect_visible(B(wlddev, wlddev$iso3c)) expect_visible(B(wlddev, ~iso3c)) expect_visible(B(wlddev, ~iso3c + region)) expect_visible(B(wlddev, wlddev$iso3c, wlddev$year)) expect_visible(B(wlddev, ~iso3c, ~year)) expect_visible(B(wlddev, cols = 9:12)) expect_visible(B(wlddev, w = wlddev$year, cols = 9:12)) expect_visible(B(wlddev, w = ~year, cols = 9:12)) expect_visible(B(wlddev, wlddev$iso3c, cols = 9:12)) expect_visible(B(wlddev, ~iso3c, cols = 9:12)) expect_visible(B(wlddev, wlddev$iso3c, wlddev$year, cols = 9:12)) expect_visible(B(wlddev, ~iso3c, ~year, cols = 9:12)) expect_visible(B(wlddev, cols = c("PCGDP","LIFEEX"))) expect_visible(B(wlddev, w = wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(B(wlddev, w = ~year, cols = c("PCGDP","LIFEEX"))) expect_visible(B(wlddev, wlddev$iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(B(wlddev, ~iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(B(wlddev, wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(B(wlddev, ~iso3c, ~year, cols = c("PCGDP","LIFEEX"))) expect_error(B(wlddev, cols = NULL)) expect_error(B(wlddev, w = wlddev$year, cols = NULL)) expect_error(B(wlddev, w = ~year, cols = NULL)) expect_error(B(wlddev, wlddev$iso3c, cols = NULL)) expect_error(B(wlddev, ~iso3c, cols = NULL)) expect_error(B(wlddev, wlddev$iso3c, wlddev$year, cols = NULL)) expect_error(B(wlddev, ~iso3c, ~year, cols = NULL)) expect_error(B(wlddev, cols = 9:14)) expect_error(B(wlddev, w = wlddev$year, cols = 9:14)) expect_error(B(wlddev, w = ~year, cols = 9:14)) expect_error(B(wlddev, wlddev$iso3c, cols = 9:14)) expect_error(B(wlddev, ~iso3c, cols = 9:14)) expect_error(B(wlddev, wlddev$iso3c, wlddev$year, cols = 9:14)) expect_error(B(wlddev, ~iso3c, ~year, cols = 9:14)) expect_error(B(wlddev, cols = c("PCGDP","LIFEEX","bla"))) expect_error(B(wlddev, w = wlddev$year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(B(wlddev, w = ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(B(wlddev, wlddev$iso3c, cols = c("PCGDP","LIFEEX","bla"))) expect_error(B(wlddev, ~iso3c, cols = c("PCGDP","LIFEEX","bla"))) expect_error(B(wlddev, wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(B(wlddev, ~iso3c, ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(B(wlddev, w = mtcars)) expect_error(B(wlddev, w = 4)) expect_error(B(wlddev, w = "year")) expect_error(B(wlddev, w = ~year2)) suppressWarnings(expect_error(B(wlddev, w = ~year + region))) expect_error(B(wlddev, mtcars)) expect_error(B(wlddev, 2)) expect_error(B(wlddev, "iso3c")) expect_error(B(wlddev, ~iso3c2)) expect_error(B(wlddev, ~iso3c + bla)) expect_error(B(wlddev, mtcars$mpg, mtcars$cyl)) expect_error(B(wlddev, 2, 4)) expect_error(B(wlddev, ~iso3c2, ~year2)) expect_error(B(wlddev, cols = ~bla)) expect_error(B(wlddev, w = ~bla, cols = 9:12)) expect_error(B(wlddev, w = 4, cols = 9:12)) expect_error(B(wlddev, w = "year", cols = 9:12)) expect_error(B(wlddev, w = ~yewar, cols = 9:12)) expect_error(B(wlddev, mtcars$mpg, cols = 9:12)) expect_error(B(wlddev, ~iso3c + ss, cols = 9:12)) expect_error(B(wlddev, 2, cols = 9:12)) expect_error(B(wlddev, "iso3c", cols = 9:12)) expect_error(B(wlddev, wlddev$iso3c, ~year + bla, cols = 9:12)) expect_error(B(wlddev, ~iso3c3, ~year, cols = 9:12)) expect_error(B(wlddev, cols = c("PC3GDP","LIFEEX"))) }) # fwithin test_that("fwithin performs like within", { expect_equal(fwithin(NA), as.double(within(NA))) expect_equal(fwithin(NA, na.rm = FALSE), as.double(within(NA))) expect_equal(fwithin(1), within(1, na.rm = TRUE)) expect_equal(fwithin(1:3), within(1:3, na.rm = TRUE)) expect_equal(fwithin(-1:1), within(-1:1, na.rm = TRUE)) expect_equal(fwithin(1, na.rm = FALSE), within(1)) expect_equal(fwithin(1:3, na.rm = FALSE), within(1:3)) expect_equal(fwithin(-1:1, na.rm = FALSE), within(-1:1)) expect_equal(fwithin(x), within(x, na.rm = TRUE)) expect_equal(fwithin(x, na.rm = FALSE), within(x)) expect_equal(fwithin(xNA, na.rm = FALSE), within(xNA)) expect_equal(fwithin(xNA), within(xNA, na.rm = TRUE)) expect_equal(qM(fwithin(mtcars)), fwithin(m)) expect_equal(fwithin(m), dapply(m, within, na.rm = TRUE)) expect_equal(fwithin(m, na.rm = FALSE), dapply(m, within)) expect_equal(fwithin(mNA, na.rm = FALSE), dapply(mNA, within)) expect_equal(fwithin(mNA), dapply(mNA, within, na.rm = TRUE)) expect_equal(fwithin(mtcars), dapply(mtcars, within, na.rm = TRUE)) expect_equal(fwithin(mtcars, na.rm = FALSE), dapply(mtcars, within)) expect_equal(fwithin(mtcNA, na.rm = FALSE), dapply(mtcNA, within)) expect_equal(fwithin(mtcNA), dapply(mtcNA, within, na.rm = TRUE)) expect_equal(fwithin(x, f), BY(x, f, within, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fwithin(x, f, na.rm = FALSE), BY(x, f, within, use.g.names = FALSE)) expect_equal(fwithin(xNA, f, na.rm = FALSE), BY(xNA, f, within, use.g.names = FALSE)) expect_equal(fwithin(xNA, f), BY(xNA, f, within, na.rm = TRUE, use.g.names = FALSE)) expect_equal(rn(fwithin(m, g)), BY(m, g, within, na.rm = TRUE, use.g.names = FALSE)) expect_equal(rn(fwithin(m, g, na.rm = FALSE)), BY(m, g, within, use.g.names = FALSE)) expect_equal(rn(fwithin(mNA, g, na.rm = FALSE)), BY(mNA, g, within, use.g.names = FALSE)) expect_equal(rn(fwithin(mNA, g)), BY(mNA, g, within, na.rm = TRUE, use.g.names = FALSE)) expect_equal(rn(fwithin(mtcars, g)), BY(mtcars, g, within, na.rm = TRUE, use.g.names = FALSE)) expect_equal(rn(fwithin(mtcars, g, na.rm = FALSE)), BY(mtcars, g, within, use.g.names = FALSE)) expect_equal(rn(fwithin(mtcNA, g, na.rm = FALSE)), BY(mtcNA, g, within, use.g.names = FALSE)) expect_equal(rn(fwithin(mtcNA, g)), BY(mtcNA, g, within, na.rm = TRUE, use.g.names = FALSE)) }) test_that("fwithin with custom mean performs like within (defined above)", { expect_equal(fwithin(x, mean = 4.8456), within(x, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(x, na.rm = FALSE, mean = 4.8456), within(x, mean = 4.8456)) expect_equal(fwithin(xNA, na.rm = FALSE, mean = 4.8456), within(xNA, mean = 4.8456)) expect_equal(fwithin(xNA, mean = 4.8456), within(xNA, na.rm = TRUE, mean = 4.8456)) expect_equal(qM(fwithin(mtcars, mean = 4.8456)), fwithin(m, mean = 4.8456)) expect_equal(fwithin(m, mean = 4.8456), dapply(m, within, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(m, na.rm = FALSE, mean = 4.8456), dapply(m, within, mean = 4.8456)) expect_equal(fwithin(mNA, na.rm = FALSE, mean = 4.8456), dapply(mNA, within, mean = 4.8456)) expect_equal(fwithin(mNA, mean = 4.8456), dapply(mNA, within, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(x, f, mean = 4.8456), BY(x, f, within, na.rm = TRUE, use.g.names = FALSE, mean = 4.8456)) expect_equal(fwithin(x, f, na.rm = FALSE, mean = 4.8456), BY(x, f, within, use.g.names = FALSE, mean = 4.8456)) expect_equal(fwithin(xNA, f, na.rm = FALSE, mean = 4.8456), BY(xNA, f, within, use.g.names = FALSE, mean = 4.8456)) expect_equal(fwithin(xNA, f, mean = 4.8456), BY(xNA, f, within, na.rm = TRUE, use.g.names = FALSE, mean = 4.8456)) expect_equal(rn(fwithin(m, g, mean = 4.8456)), BY(m, g, within, na.rm = TRUE, use.g.names = FALSE, mean = 4.8456)) expect_equal(rn(fwithin(m, g, na.rm = FALSE, mean = 4.8456)), BY(m, g, within, use.g.names = FALSE, mean = 4.8456)) expect_equal(rn(fwithin(mNA, g, na.rm = FALSE, mean = 4.8456)), BY(mNA, g, within, use.g.names = FALSE, mean = 4.8456)) expect_equal(rn(fwithin(mNA, g, mean = 4.8456)), BY(mNA, g, within, na.rm = TRUE, use.g.names = FALSE, mean = 4.8456)) expect_equal(rn(fwithin(mtcars, g, mean = 4.8456)), BY(mtcars, g, within, na.rm = TRUE, use.g.names = FALSE, mean = 4.8456)) expect_equal(rn(fwithin(mtcars, g, na.rm = FALSE, mean = 4.8456)), BY(mtcars, g, within, use.g.names = FALSE, mean = 4.8456)) expect_equal(rn(fwithin(mtcNA, g, na.rm = FALSE, mean = 4.8456)), BY(mtcNA, g, within, use.g.names = FALSE, mean = 4.8456)) expect_equal(rn(fwithin(mtcNA, g, mean = 4.8456)), BY(mtcNA, g, within, na.rm = TRUE, use.g.names = FALSE, mean = 4.8456)) }) test_that("Centering on overall mean performs as indended", { expect_equal(fwithin(x, f, mean = "overall.mean"), BY(x, f, within, na.rm = TRUE, use.g.names = FALSE) + ave(x)) expect_equal(fwithin(x, f, na.rm = FALSE, mean = "overall.mean"), BY(x, f, within, use.g.names = FALSE) + ave(x)) # expect_equal(fwithin(xNA, f, na.rm = FALSE, mean = "overall.mean"), BY(xNA, f, within, use.g.names = FALSE) + B(xNA)) # Not the same !! expect_equal(fwithin(xNA, f, mean = "overall.mean"), BY(xNA, f, within, na.rm = TRUE, use.g.names = FALSE) + B(xNA)) expect_equal(rn(fwithin(m, g, mean = "overall.mean")), BY(m, g, within, na.rm = TRUE, use.g.names = FALSE) + B(m)) expect_equal(rn(fwithin(m, g, na.rm = FALSE, mean = "overall.mean")), BY(m, g, within, use.g.names = FALSE) + B(m)) # expect_equal(fwithin(mNA, g, na.rm = FALSE, mean = "overall.mean"), BY(mNA, g, within, use.g.names = FALSE) + B(mNA)) expect_equal(rn(fwithin(mNA, g, mean = "overall.mean")), BY(mNA, g, within, na.rm = TRUE, use.g.names = FALSE) + B(mNA)) expect_equal(rn(fwithin(mtcars, g, mean = "overall.mean")), BY(mtcars, g, within, na.rm = TRUE, use.g.names = FALSE) + B(mtcars)) expect_equal(rn(fwithin(mtcars, g, na.rm = FALSE, mean = "overall.mean")), BY(mtcars, g, within, use.g.names = FALSE) + B(mtcars)) # expect_equal(fwithin(mtcNA, g, na.rm = FALSE, mean = "overall.mean"), BY(mtcNA, g, within, use.g.names = FALSE) + B(mtcNA)) expect_equal(rn(fwithin(mtcNA, g, mean = "overall.mean")), BY(mtcNA, g, within, na.rm = TRUE, use.g.names = FALSE)+ B(mtcNA)) }) test_that("fwithin performs like fwithin with weights all equal", { expect_equal(fwithin(NA), fwithin(NA, w = 0.99999999)) expect_equal(fwithin(NA, na.rm = FALSE), fwithin(NA, w = 2.946, na.rm = FALSE)) expect_equal(fwithin(1), fwithin(1, w = 3)) expect_equal(fwithin(1:3), fwithin(1:3, w = rep(0.999,3))) expect_equal(fwithin(-1:1), fwithin(-1:1, w = rep(4.2,3))) expect_equal(fwithin(1, na.rm = FALSE), fwithin(1, w = 5, na.rm = FALSE)) expect_equal(fwithin(1:3, na.rm = FALSE), fwithin(1:3, w = rep(1.44565, 3), na.rm = FALSE)) expect_equal(fwithin(-1:1, na.rm = FALSE), fwithin(-1:1, w = rep(1.44565, 3), na.rm = FALSE)) expect_equal(fwithin(x), fwithin(x, w = rep(1,100))) expect_equal(fwithin(x, na.rm = FALSE), fwithin(x, w = rep(1.44565, 100), na.rm = FALSE)) expect_equal(fwithin(xNA, na.rm = FALSE), fwithin(xNA, w = rep(4.676587, 100), na.rm = FALSE)) expect_equal(fwithin(xNA), fwithin(xNA, w = rep(4.676587, 100))) expect_equal(fwithin(m), fwithin(m, w = rep(6587.3454, 32))) expect_equal(fwithin(m, na.rm = FALSE), fwithin(m, w = rep(6587.3454, 32), na.rm = FALSE)) expect_equal(fwithin(mNA, na.rm = FALSE), fwithin(mNA, w = rep(6587.3454, 32), na.rm = FALSE)) expect_equal(fwithin(mNA), fwithin(mNA, w = rep(6587.3454, 32))) expect_equal(fwithin(mtcars), fwithin(mtcars, w = rep(6787.3454, 32))) expect_equal(fwithin(mtcars, na.rm = FALSE), fwithin(mtcars, w = rep(6787.3454, 32), na.rm = FALSE)) expect_equal(fwithin(mtcNA, na.rm = FALSE), fwithin(mtcNA, w = rep(6787.3454, 32), na.rm = FALSE)) expect_equal(fwithin(mtcNA), fwithin(mtcNA, w = rep(6787.3454, 32))) expect_equal(fwithin(x, f), fwithin(x, f, rep(546.78,100))) expect_equal(fwithin(x, f, na.rm = FALSE), fwithin(x, f, rep(5.88,100), na.rm = FALSE)) expect_equal(fwithin(xNA, f, na.rm = FALSE), fwithin(xNA, f, rep(52.7,100), na.rm = FALSE)) expect_equal(fwithin(xNA, f), fwithin(xNA, f, rep(5997456,100))) expect_equal(fwithin(m, g), fwithin(m, g, rep(546.78,32))) expect_equal(fwithin(m, g, na.rm = FALSE), fwithin(m, g, rep(0.0001,32), na.rm = FALSE)) expect_equal(fwithin(mNA, g, na.rm = FALSE), fwithin(mNA, g, rep(5.7,32), na.rm = FALSE)) expect_equal(fwithin(mNA, g), fwithin(mNA, g, rep(1.1,32))) expect_equal(fwithin(mtcars, g), fwithin(mtcars, g, rep(53,32))) expect_equal(fwithin(mtcars, g, na.rm = FALSE), fwithin(mtcars, g, rep(546.78,32), na.rm = FALSE)) expect_equal(fwithin(mtcNA, g, na.rm = FALSE), fwithin(mtcNA, g, rep(0.999999,32), na.rm = FALSE)) expect_equal(fwithin(mtcNA, g), fwithin(mtcNA, g, rep(999.9999,32))) }) test_that("fwithin with weights performs like wwithin (defined above)", { # complete weights expect_equal(fwithin(NA, w = 1), wwithin(NA, 1)) expect_equal(fwithin(NA, w = 1, na.rm = FALSE), wwithin(NA, 1)) expect_equal(fwithin(1, w = 1), wwithin(1, w = 1)) expect_equal(fwithin(1:3, w = 1:3), wwithin(1:3, 1:3)) expect_equal(fwithin(-1:1, w = 1:3), wwithin(-1:1, 1:3)) expect_equal(fwithin(1, w = 1, na.rm = FALSE), wwithin(1, 1)) expect_equal(fwithin(1:3, w = c(0.99,3454,1.111), na.rm = FALSE), wwithin(1:3, c(0.99,3454,1.111))) expect_equal(fwithin(-1:1, w = 1:3, na.rm = FALSE), wwithin(-1:1, 1:3)) expect_equal(fwithin(x, w = w), wwithin(x, w)) expect_equal(fwithin(x, w = w, na.rm = FALSE), wwithin(x, w)) expect_equal(fwithin(xNA, w = w, na.rm = FALSE), wwithin(xNA, w)) expect_equal(fwithin(xNA, w = w), wwithin(xNA, w, na.rm = TRUE)) expect_equal(qM(fwithin(mtcars, w = wdat)), fwithin(m, w = wdat)) expect_equal(fwithin(m, w = wdat), dapply(m, wwithin, wdat, na.rm = TRUE)) expect_equal(fwithin(m, w = wdat, na.rm = FALSE), dapply(m, wwithin, wdat)) expect_equal(fwithin(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wwithin, wdat)) expect_equal(fwithin(mNA, w = wdat), dapply(mNA, wwithin, wdat, na.rm = TRUE)) expect_equal(fwithin(mtcars, w = wdat), dapply(mtcars, wwithin, wdat, na.rm = TRUE)) expect_equal(fwithin(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wwithin, wdat)) expect_equal(fwithin(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wwithin, wdat)) expect_equal(fwithin(mtcNA, w = wdat), dapply(mtcNA, wwithin, wdat, na.rm = TRUE)) expect_equal(fwithin(x, f, w), wBY(x, f, wwithin, w)) expect_equal(fwithin(x, f, w, na.rm = FALSE), wBY(x, f, wwithin, w)) expect_equal(fwithin(xNA, f, w, na.rm = FALSE), wBY(xNA, f, wwithin, w)) expect_equal(fwithin(xNA, f, w), wBY(xNA, f, wwithin, w, na.rm = TRUE)) expect_equal(fwithin(m, g, wdat), wBY(m, g, wwithin, wdat)) expect_equal(fwithin(m, g, wdat, na.rm = FALSE), wBY(m, g, wwithin, wdat)) expect_equal(fwithin(mNA, g, wdat, na.rm = FALSE), wBY(mNA, g, wwithin, wdat)) expect_equal(fwithin(mNA, g, wdat), wBY(mNA, g, wwithin, wdat, na.rm = TRUE)) expect_equal(fwithin(mtcars, g, wdat), wBY(mtcars, g, wwithin, wdat)) expect_equal(fwithin(mtcars, g, wdat, na.rm = FALSE), wBY(mtcars, g, wwithin, wdat)) expect_equal(fwithin(mtcNA, g, wdat, na.rm = FALSE), wBY(mtcNA, g, wwithin, wdat)) expect_equal(fwithin(mtcNA, g, wdat), wBY(mtcNA, g, wwithin, wdat, na.rm = TRUE)) # missing weights expect_equal(fwithin(NA, w = NA), wwithin(NA, NA)) expect_equal(fwithin(NA, w = NA, na.rm = FALSE), wwithin(NA, NA)) expect_equal(fwithin(1, w = NA), wwithin(1, w = NA)) expect_equal(fwithin(1:3, w = c(NA,1:2)), wwithin(1:3, c(NA,1:2), na.rm = TRUE)) expect_equal(fwithin(-1:1, w = c(NA,1:2)), wwithin(-1:1, c(NA,1:2), na.rm = TRUE)) expect_equal(fwithin(1, w = NA, na.rm = FALSE), wwithin(1, NA)) expect_equal(fwithin(1:3, w = c(NA,1:2), na.rm = FALSE), wwithin(1:3, c(NA,1:2))) expect_equal(fwithin(-1:1, w = c(NA,1:2), na.rm = FALSE), wwithin(-1:1, c(NA,1:2))) expect_equal(fwithin(x, w = wNA), wwithin(x, wNA, na.rm = TRUE)) expect_equal(fwithin(x, w = wNA, na.rm = FALSE), wwithin(x, wNA)) expect_equal(fwithin(xNA, w = wNA, na.rm = FALSE), wwithin(xNA, wNA)) expect_equal(fwithin(xNA, w = wNA), wwithin(xNA, wNA, na.rm = TRUE)) expect_equal(qM(fwithin(mtcars, w = wdatNA)), fwithin(m, w = wdatNA)) expect_equal(fwithin(m, w = wdatNA), dapply(m, wwithin, wdatNA, na.rm = TRUE)) expect_equal(fwithin(m, w = wdatNA, na.rm = FALSE), dapply(m, wwithin, wdatNA)) expect_equal(fwithin(mNA, w = wdatNA, na.rm = FALSE), dapply(mNA, wwithin, wdatNA)) expect_equal(fwithin(mNA, w = wdatNA), dapply(mNA, wwithin, wdatNA, na.rm = TRUE)) expect_equal(fwithin(mtcars, w = wdatNA), dapply(mtcars, wwithin, wdatNA, na.rm = TRUE)) expect_equal(fwithin(mtcars, w = wdatNA, na.rm = FALSE), dapply(mtcars, wwithin, wdatNA)) expect_equal(fwithin(mtcNA, w = wdatNA, na.rm = FALSE), dapply(mtcNA, wwithin, wdatNA)) expect_equal(fwithin(mtcNA, w = wdatNA), dapply(mtcNA, wwithin, wdatNA, na.rm = TRUE)) expect_equal(fwithin(x, f, wNA), wBY(x, f, wwithin, wNA, na.rm = TRUE)) expect_equal(fwithin(x, f, wNA, na.rm = FALSE), wBY(x, f, wwithin, wNA)) expect_equal(fwithin(xNA, f, wNA, na.rm = FALSE), wBY(xNA, f, wwithin, wNA)) expect_equal(fwithin(xNA, f, wNA), wBY(xNA, f, wwithin, wNA, na.rm = TRUE)) expect_equal(fwithin(m, g, wdatNA), wBY(m, g, wwithin, wdatNA, na.rm = TRUE)) expect_equal(fwithin(m, g, wdatNA, na.rm = FALSE), wBY(m, g, wwithin, wdatNA)) expect_equal(fwithin(mNA, g, wdatNA, na.rm = FALSE), wBY(mNA, g, wwithin, wdatNA)) expect_equal(fwithin(mNA, g, wdatNA), wBY(mNA, g, wwithin, wdatNA, na.rm = TRUE)) expect_equal(fwithin(mtcars, g, wdatNA), wBY(mtcars, g, wwithin, wdatNA, na.rm = TRUE)) expect_equal(fwithin(mtcars, g, wdatNA, na.rm = FALSE), wBY(mtcars, g, wwithin, wdatNA)) expect_equal(fwithin(mtcNA, g, wdatNA, na.rm = FALSE), wBY(mtcNA, g, wwithin, wdatNA)) expect_equal(fwithin(mtcNA, g, wdatNA), wBY(mtcNA, g, wwithin, wdatNA, na.rm = TRUE)) }) test_that("fwithin with custom mean and weights performs like wwithin (defined above)", { # complete weights expect_equal(fwithin(x, w = w, mean = 4.8456), wwithin(x, w, mean = 4.8456)) expect_equal(fwithin(x, w = w, na.rm = FALSE, mean = 4.8456), wwithin(x, w, mean = 4.8456)) expect_equal(fwithin(xNA, w = w, na.rm = FALSE, mean = 4.8456), wwithin(xNA, w, mean = 4.8456)) expect_equal(fwithin(xNA, w = w, mean = 4.8456), wwithin(xNA, w, na.rm = TRUE, mean = 4.8456)) expect_equal(qM(fwithin(mtcars, w = wdat, mean = 4.8456)), fwithin(m, w = wdat, mean = 4.8456)) expect_equal(fwithin(m, w = wdat, mean = 4.8456), dapply(m, wwithin, wdat, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(m, w = wdat, na.rm = FALSE, mean = 4.8456), dapply(m, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mNA, w = wdat, na.rm = FALSE, mean = 4.8456), dapply(mNA, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mNA, w = wdat, mean = 4.8456), dapply(mNA, wwithin, wdat, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(mtcars, w = wdat, mean = 4.8456), dapply(mtcars, wwithin, wdat, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(mtcars, w = wdat, na.rm = FALSE, mean = 4.8456), dapply(mtcars, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mtcNA, w = wdat, na.rm = FALSE, mean = 4.8456), dapply(mtcNA, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mtcNA, w = wdat, mean = 4.8456), dapply(mtcNA, wwithin, wdat, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(x, f, w, mean = 4.8456), wBY(x, f, wwithin, w, mean = 4.8456)) expect_equal(fwithin(x, f, w, na.rm = FALSE, mean = 4.8456), wBY(x, f, wwithin, w, mean = 4.8456)) expect_equal(fwithin(xNA, f, w, na.rm = FALSE, mean = 4.8456), wBY(xNA, f, wwithin, w, mean = 4.8456)) expect_equal(fwithin(xNA, f, w, mean = 4.8456), wBY(xNA, f, wwithin, w, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(m, g, wdat, mean = 4.8456), wBY(m, g, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(m, g, wdat, na.rm = FALSE, mean = 4.8456), wBY(m, g, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mNA, g, wdat, na.rm = FALSE, mean = 4.8456), wBY(mNA, g, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mNA, g, wdat, mean = 4.8456), wBY(mNA, g, wwithin, wdat, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(mtcars, g, wdat, mean = 4.8456), wBY(mtcars, g, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mtcars, g, wdat, na.rm = FALSE, mean = 4.8456), wBY(mtcars, g, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mtcNA, g, wdat, na.rm = FALSE, mean = 4.8456), wBY(mtcNA, g, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mtcNA, g, wdat, mean = 4.8456), wBY(mtcNA, g, wwithin, wdat, na.rm = TRUE, mean = 4.8456)) # missing weights expect_equal(fwithin(x, w = wNA, mean = 4.8456), wwithin(x, wNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(x, w = wNA, na.rm = FALSE, mean = 4.8456), wwithin(x, wNA, mean = 4.8456)) expect_equal(fwithin(xNA, w = wNA, na.rm = FALSE, mean = 4.8456), wwithin(xNA, wNA, mean = 4.8456)) expect_equal(fwithin(xNA, w = wNA, mean = 4.8456), wwithin(xNA, wNA, na.rm = TRUE, mean = 4.8456)) expect_equal(qM(fwithin(mtcars, w = wdatNA, mean = 4.8456)), fwithin(m, w = wdatNA, mean = 4.8456)) expect_equal(fwithin(m, w = wdatNA, mean = 4.8456), dapply(m, wwithin, wdatNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(m, w = wdatNA, na.rm = FALSE, mean = 4.8456), dapply(m, wwithin, wdatNA, mean = 4.8456)) expect_equal(fwithin(mNA, w = wdatNA, na.rm = FALSE, mean = 4.8456), dapply(mNA, wwithin, wdatNA, mean = 4.8456)) expect_equal(fwithin(mNA, w = wdatNA, mean = 4.8456), dapply(mNA, wwithin, wdatNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(mtcars, w = wdatNA, mean = 4.8456), dapply(mtcars, wwithin, wdatNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(mtcars, w = wdatNA, na.rm = FALSE, mean = 4.8456), dapply(mtcars, wwithin, wdatNA, mean = 4.8456)) expect_equal(fwithin(mtcNA, w = wdatNA, na.rm = FALSE, mean = 4.8456), dapply(mtcNA, wwithin, wdatNA, mean = 4.8456)) expect_equal(fwithin(mtcNA, w = wdatNA, mean = 4.8456), dapply(mtcNA, wwithin, wdatNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(x, f, wNA, mean = 4.8456), wBY(x, f, wwithin, wNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(x, f, wNA, na.rm = FALSE, mean = 4.8456), wBY(x, f, wwithin, wNA, mean = 4.8456)) expect_equal(fwithin(xNA, f, wNA, na.rm = FALSE, mean = 4.8456), wBY(xNA, f, wwithin, wNA, mean = 4.8456)) expect_equal(fwithin(xNA, f, wNA, mean = 4.8456), wBY(xNA, f, wwithin, wNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(m, g, wdatNA, mean = 4.8456), wBY(m, g, wwithin, wdatNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(m, g, wdatNA, na.rm = FALSE, mean = 4.8456), wBY(m, g, wwithin, wdatNA, mean = 4.8456)) expect_equal(fwithin(mNA, g, wdatNA, na.rm = FALSE, mean = 4.8456), wBY(mNA, g, wwithin, wdatNA, mean = 4.8456)) expect_equal(fwithin(mNA, g, wdatNA, mean = 4.8456), wBY(mNA, g, wwithin, wdatNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(mtcars, g, wdatNA, mean = 4.8456), wBY(mtcars, g, wwithin, wdatNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(mtcars, g, wdatNA, na.rm = FALSE, mean = 4.8456), wBY(mtcars, g, wwithin, wdatNA, mean = 4.8456)) expect_equal(fwithin(mtcNA, g, wdatNA, na.rm = FALSE, mean = 4.8456), wBY(mtcNA, g, wwithin, wdatNA, mean = 4.8456)) expect_equal(fwithin(mtcNA, g, wdatNA, mean = 4.8456), wBY(mtcNA, g, wwithin, wdatNA, na.rm = TRUE, mean = 4.8456)) }) test_that("Weighted centering on overall weighted mean performs as indended", { # complete weights expect_equal(fwithin(x, f, w, mean = "overall.mean"), as.numeric(mapply(wwithin, split(x, f), split(w, f), na.rm = TRUE)) + B(x, w = w)) expect_equal(fwithin(x, f, w, na.rm = FALSE, mean = "overall.mean"), as.numeric(mapply(wwithin, split(x, f), split(w, f))) + B(x, w = w)) # expect_equal(fwithin(xNA, f, w, na.rm = FALSE, mean = "overall.mean"), as.numeric(mapply(wwithin, split(xNA, f), split(w, f))) + B(xNA, w = w)) # Not the same !! expect_equal(fwithin(xNA, f, w, mean = "overall.mean"), as.numeric(mapply(wwithin, split(xNA, f), split(w, f), na.rm = TRUE)) + B(xNA, w = w)) }) # Do more than this to test the rest ... test_that("fwithin performs numerically stable", { expect_true(all_obj_equal(replicate(50, fwithin(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, g), simplify = FALSE))) }) test_that("fwithin with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fwithin(1, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(NA, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(NA, w = 1, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fwithin with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fwithin(1, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(NA, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(NA, w = NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, g, wdatNA), simplify = FALSE))) }) test_that("fwithin handles special values in the right way", { expect_equal(fwithin(NA), NA_real_) expect_equal(fwithin(NaN), NaN) expect_equal(fwithin(Inf), NaN) expect_equal(fwithin(c(Inf,Inf)), c(NaN,NaN)) expect_equal(fwithin(-Inf), NaN) expect_equal(fwithin(c(-Inf,-Inf)), c(NaN,NaN)) expect_equal(fwithin(TRUE), 0) expect_equal(fwithin(FALSE), 0) expect_equal(fwithin(NA, na.rm = FALSE), NA_real_) expect_equal(fwithin(NaN, na.rm = FALSE), NaN) expect_equal(fwithin(Inf, na.rm = FALSE), NaN) expect_equal(fwithin(c(Inf,Inf), na.rm = FALSE), c(NaN,NaN)) expect_equal(fwithin(-Inf, na.rm = FALSE), NaN) expect_equal(fwithin(c(-Inf,-Inf), na.rm = FALSE), c(NaN,NaN)) expect_equal(fwithin(TRUE, na.rm = FALSE), 0) expect_equal(fwithin(FALSE, na.rm = FALSE), 0) expect_equal(fwithin(c(1,NA)), c(0,NA_real_)) expect_equal(fwithin(c(1,NaN)), c(0,NaN)) expect_equal(fwithin(c(1,Inf)), c(-Inf,NaN)) expect_equal(fwithin(c(1,-Inf)), c(Inf,NaN)) expect_equal(fwithin(c(1,Inf), na.rm = FALSE), c(-Inf,NaN)) expect_equal(fwithin(c(1,-Inf), na.rm = FALSE), c(Inf,NaN)) expect_equal(fwithin(c(NA,-Inf), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(c(TRUE,TRUE), na.rm = FALSE), c(0,0)) expect_equal(fwithin(c(1,NA), na.rm = FALSE), c(NA_real_,NA_real_)) }) test_that("fwithin with weights handles special values in the right way", { expect_equal(fwithin(NA, w = 1), NA_real_) expect_equal(fwithin(NaN, w = 1), NaN) expect_equal(fwithin(Inf, w = 1), NaN) expect_equal(fwithin(c(Inf,Inf), w = 1:2), c(NaN,NaN)) expect_equal(fwithin(-Inf, w = 1), NaN) expect_equal(fwithin(c(-Inf,-Inf), w = 1:2), c(NaN,NaN)) expect_equal(fwithin(TRUE, w = 1), 0) expect_equal(fwithin(FALSE, w = 1), 0) expect_equal(fwithin(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fwithin(NaN, w = 1, na.rm = FALSE), NaN) expect_equal(fwithin(Inf, w = 1, na.rm = FALSE), NaN) expect_equal(fwithin(c(Inf,Inf), w = 1:2, na.rm = FALSE), c(NaN,NaN)) expect_equal(fwithin(-Inf, w = 1, na.rm = FALSE), NaN) expect_equal(fwithin(c(-Inf,-Inf), w = 1:2, na.rm = FALSE), c(NaN,NaN)) expect_equal(fwithin(TRUE, w = 1, na.rm = FALSE), 0) expect_equal(fwithin(FALSE, w = 1, na.rm = FALSE), 0) expect_equal(fwithin(c(1,NA), w = 1:2), c(0,NA)) expect_equal(fwithin(c(1,NaN), w = 1:2), c(0,NaN)) expect_equal(fwithin(c(1,Inf), w = 1:2), c(-Inf,NaN)) expect_equal(fwithin(c(1,-Inf), w = 1:2), c(Inf,NaN)) expect_equal(fwithin(c(1,Inf), w = 1:2, na.rm = FALSE), c(-Inf,NaN)) expect_equal(fwithin(c(1,-Inf), w = 1:2, na.rm = FALSE), c(Inf,NaN)) expect_equal(fwithin(c(NA,-Inf), w = 1:2, na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(c(FALSE,FALSE), w = 1:2, na.rm = FALSE), c(0,0)) expect_equal(fwithin(c(1,NA), w = 1:2, na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(1:3, w = c(1,Inf,3)), c(NaN,NaN,NaN)) expect_equal(fwithin(1:3, w = c(1,-Inf,3)), c(NaN,NaN,NaN)) expect_equal(fwithin(1:3, w = c(1,Inf,3), na.rm = FALSE), c(NaN,NaN,NaN)) expect_equal(fwithin(1:3, w = c(1,-Inf,3), na.rm = FALSE), c(NaN,NaN,NaN)) expect_equal(fwithin(NA, w = NA), NA_real_) expect_equal(fwithin(NaN, w = NA), NaN) expect_equal(fwithin(Inf, w = NA), NaN) expect_equal(fwithin(c(Inf,Inf), w = c(NA,2)), c(NaN,NaN)) expect_equal(fwithin(-Inf, w = NA), NA_real_) expect_equal(fwithin(c(-Inf,-Inf), w = c(NA,2)), c(NaN,NaN)) expect_equal(fwithin(TRUE, w = NA), NA_real_) expect_equal(fwithin(FALSE, w = NA), NA_real_) expect_equal(fwithin(NA, w = NA, na.rm = FALSE), NA_real_) expect_equal(fwithin(NaN, w = NA, na.rm = FALSE), NA_real_) expect_equal(fwithin(Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fwithin(c(Inf,Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(-Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fwithin(c(-Inf,-Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(TRUE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fwithin(FALSE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fwithin(c(1,NA), w = c(NA,2)), c(NA_real_,NA_real_)) expect_equal(fwithin(c(1,NaN), w = c(NA,2)), c(NA_real_,NA_real_)) expect_equal(fwithin(c(1,Inf), w = c(NA,2)), c(-Inf,NaN)) expect_equal(fwithin(c(1,-Inf), w = c(NA,2)), c(Inf,NaN)) expect_equal(fwithin(c(1,Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(c(1,-Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(c(NA,-Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(c(FALSE,FALSE), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(c(1,NA), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(1:3, w = c(NA,Inf,3)), c(NaN,NaN,NaN)) expect_equal(fwithin(1:3, w = c(NA,-Inf,3)), c(NaN,NaN,NaN)) expect_equal(fwithin(1:3, w = c(NA,Inf,3), na.rm = FALSE), c(NA_real_,NA_real_,NA_real_)) expect_equal(fwithin(1:3, w = c(NA,-Inf,3), na.rm = FALSE), c(NA_real_,NA_real_,NA_real_)) }) test_that("fwithin produces errors for wrong input", { expect_error(fwithin("a")) expect_error(fwithin(NA_character_)) expect_error(fwithin(mNAc)) expect_error(fwithin(mNAc, f)) expect_error(fwithin(1:2,1:3)) expect_error(fwithin(m,1:31)) expect_error(fwithin(mtcars,1:31)) expect_error(fwithin(mtcars, w = 1:31)) expect_error(fwithin("a", w = 1)) expect_error(fwithin(1:2, w = 1:3)) expect_error(fwithin(NA_character_, w = 1)) expect_error(fwithin(mNAc, w = wdat)) expect_error(fwithin(mNAc, f, wdat)) expect_error(fwithin(mNA, w = 1:33)) expect_error(fwithin(1:2,1:2, 1:3)) expect_error(fwithin(m,1:32,1:20)) expect_error(fwithin(mtcars,1:32,1:10)) expect_error(fwithin(1:2, w = c("a","b"))) expect_error(fwithin(wlddev)) expect_error(fwithin(wlddev, w = wlddev$year)) expect_error(fwithin(wlddev, wlddev$iso3c)) expect_error(fwithin(wlddev, wlddev$iso3c, wlddev$year)) }) test_that("fwithin shoots errors for wrong input to mean", { expect_error(fwithin(x, mean = FALSE)) expect_error(fwithin(m, mean = FALSE)) expect_error(fwithin(mtcars, mean = FALSE)) expect_error(fwithin(x, mean = "overall.mean")) expect_error(fwithin(m, mean = "overall.mean")) expect_error(fwithin(mtcars, mean = "overall.mean")) expect_error(fwithin(m, mean = fmean(m))) expect_error(fwithin(mtcars, mean = fmean(mtcars))) }) # W test_that("W produces errors for wrong input", { expect_error(W("a")) expect_error(W(NA_character_)) expect_error(W(mNAc)) expect_error(W(mNAc, f)) expect_error(W(1:2,1:3)) expect_error(W(m,1:31)) expect_error(W(mtcars,1:31)) expect_error(W(mtcars, w = 1:31)) expect_error(W("a", w = 1)) expect_error(W(1:2, w = c("a","b"))) expect_error(W(1:2, w = 1:3)) expect_error(W(NA_character_, w = 1)) expect_error(W(mNAc, w = wdat)) expect_error(W(mNAc, f, wdat)) expect_error(W(mNA, w = 1:33)) expect_error(W(mtcNA, w = 1:33)) expect_error(W(1:2,1:2, 1:3)) expect_error(W(m,1:32,1:20)) expect_error(W(mtcars,1:32,1:10)) expect_error(W(1:2, 1:3, 1:2)) expect_error(W(m,1:31,1:32)) expect_error(W(mtcars,1:33,1:32)) }) test_that("W.data.frame method is foolproof", { expect_visible(W(wlddev)) expect_visible(W(wlddev, w = wlddev$year)) expect_visible(W(wlddev, w = ~year)) expect_visible(W(wlddev, wlddev$iso3c)) expect_visible(W(wlddev, ~iso3c)) expect_visible(W(wlddev, ~iso3c + region)) expect_visible(W(wlddev, wlddev$iso3c, wlddev$year)) expect_visible(W(wlddev, ~iso3c, ~year)) expect_visible(W(wlddev, cols = 9:12)) expect_visible(W(wlddev, w = wlddev$year, cols = 9:12)) expect_visible(W(wlddev, w = ~year, cols = 9:12)) expect_visible(W(wlddev, wlddev$iso3c, cols = 9:12)) expect_visible(W(wlddev, ~iso3c, cols = 9:12)) expect_visible(W(wlddev, wlddev$iso3c, wlddev$year, cols = 9:12)) expect_visible(W(wlddev, ~iso3c, ~year, cols = 9:12)) expect_visible(W(wlddev, cols = c("PCGDP","LIFEEX"))) expect_visible(W(wlddev, w = wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(W(wlddev, w = ~year, cols = c("PCGDP","LIFEEX"))) expect_visible(W(wlddev, wlddev$iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(W(wlddev, ~iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(W(wlddev, wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(W(wlddev, ~iso3c, ~year, cols = c("PCGDP","LIFEEX"))) expect_error(W(wlddev, cols = NULL)) expect_error(W(wlddev, w = wlddev$year, cols = NULL)) expect_error(W(wlddev, w = ~year, cols = NULL)) expect_error(W(wlddev, wlddev$iso3c, cols = NULL)) expect_error(W(wlddev, ~iso3c, cols = NULL)) expect_error(W(wlddev, wlddev$iso3c, wlddev$year, cols = NULL)) expect_error(W(wlddev, ~iso3c, ~year, cols = NULL)) expect_error(W(wlddev, cols = 9:14)) expect_error(W(wlddev, w = wlddev$year, cols = 9:14)) expect_error(W(wlddev, w = ~year, cols = 9:14)) expect_error(W(wlddev, wlddev$iso3c, cols = 9:14)) expect_error(W(wlddev, ~iso3c, cols = 9:14)) expect_error(W(wlddev, wlddev$iso3c, wlddev$year, cols = 9:14)) expect_error(W(wlddev, ~iso3c, ~year, cols = 9:14)) expect_error(W(wlddev, cols = c("PCGDP","LIFEEX","bla"))) expect_error(W(wlddev, w = wlddev$year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(W(wlddev, w = ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(W(wlddev, wlddev$iso3c, cols = c("PCGDP","LIFEEX","bla"))) expect_error(W(wlddev, ~iso3c, cols = c("PCGDP","LIFEEX","bla"))) expect_error(W(wlddev, wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(W(wlddev, ~iso3c, ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(W(wlddev, w = mtcars)) expect_error(W(wlddev, w = 4)) expect_error(W(wlddev, w = "year")) expect_error(W(wlddev, w = ~year2)) suppressWarnings(expect_error(W(wlddev, w = ~year + region))) expect_error(W(wlddev, mtcars)) expect_error(W(wlddev, 2)) expect_error(W(wlddev, "iso3c")) expect_error(W(wlddev, ~iso3c2)) expect_error(W(wlddev, ~iso3c + bla)) expect_error(W(wlddev, mtcars$mpg, mtcars$cyl)) expect_error(W(wlddev, 2, 4)) expect_error(W(wlddev, ~iso3c2, ~year2)) expect_error(W(wlddev, cols = ~bla)) expect_error(W(wlddev, w = ~bla, cols = 9:12)) expect_error(W(wlddev, w = 4, cols = 9:12)) expect_error(W(wlddev, w = "year", cols = 9:12)) expect_error(W(wlddev, w = ~yewar, cols = 9:12)) expect_error(W(wlddev, mtcars$mpg, cols = 9:12)) expect_error(W(wlddev, ~iso3c + ss, cols = 9:12)) expect_error(W(wlddev, 2, cols = 9:12)) expect_error(W(wlddev, "iso3c", cols = 9:12)) expect_error(W(wlddev, wlddev$iso3c, ~year + bla, cols = 9:12)) expect_error(W(wlddev, ~iso3c3, ~year, cols = 9:12)) expect_error(W(wlddev, cols = c("PC3GDP","LIFEEX"))) }) collapse/tests/testthat/test-setop.R0000644000176200001440000001574314162170342017322 0ustar liggesuserscontext("setop") d <- mtcars$mpg dc <- copyv(d, 0, 0) i <- as.integer(mtcars$cyl) ic <- copyv(i, 0, 0) dm <- as.matrix(mtcars) + 1 dmc <- copyv(dm, 0, 0) im <- dm storage.mode(im) <- "integer" imc <- copyv(im, 0, 0) dr <- dm[nrow(dm), ] ir <- im[nrow(im), ] ddf <- mtcars %c+% 1 idf <- dapply(ddf, as.integer) ddfc <- copyv(ddf, 0, 0) idfc <- copyv(idf, 0, 0) ops <- c("+", "-", "*", "/") test_that("setop works in scalar-vector operations", { expect_equal(i %+=% 2 %-=% 2, ic) expect_equal(i %+=% 2L %-=% 2L, ic) expect_equal(i %*=% 2 %/=% 2, ic) expect_equal(i %*=% 2L %/=% 2L, ic) expect_equal(d %+=% 2 %-=% 2, dc) expect_equal(d %+=% 2L %-=% 2L, dc) expect_equal(d %*=% 2 %/=% 2, dc) expect_equal(d %*=% 2L %/=% 2L, dc) expect_equal(i %+=% dc %-=% trunc(dc), ic) # Problem: The computation creates a decimal which is then rounded down... expect_equal(i %+=% ic %-=% ic, ic) expect_equal(i %*=% dc %/=% trunc(dc), ic) expect_equal(i %*=% ic %/=% ic, ic) expect_equal(d %+=% dc %-=% dc, dc) expect_equal(d %+=% ic %-=% ic, dc) expect_equal(d %*=% dc %/=% dc, dc) expect_equal(d %*=% ic %/=% ic, dc) expect_identical(i, ic) expect_equal(d, dc) # Same with setop function for(o in ops) setop(i, o, 2); expect_identical(i, ic) for(o in ops) setop(d, o, 2); expect_equal(d, dc) for(o in ops) setop(i, o, 2L); expect_identical(i, ic) for(o in ops) setop(d, o, 2L); expect_equal(d, dc) for(o in ops) setop(i, o, trunc(dc)); expect_identical(i, ic) for(o in ops) setop(d, o, dc); expect_equal(d, dc) for(o in ops) setop(i, o, ic); expect_identical(i, ic) for(o in ops) setop(d, o, ic); expect_equal(d, dc) }) test_that("setop works in scalar-vector-matrix operations", { # Matrix & Scalar expect_equal(im %+=% 2 %-=% 2, imc) expect_equal(im %+=% 2L %-=% 2L, imc) expect_equal(im %*=% 2 %/=% 2, imc) expect_equal(im %*=% 2L %/=% 2L, imc) expect_equal(dm %+=% 2 %-=% 2, dmc) expect_equal(dm %+=% 2L %-=% 2L, dmc) expect_equal(dm %*=% 2 %/=% 2, dmc) expect_equal(dm %*=% 2L %/=% 2L, dmc) # Matrix & Vector expect_equal(im %+=% trunc(dc) %-=% trunc(dc), imc) expect_equal(im %+=% ic %-=% ic, imc) expect_equal(im %*=% trunc(dc) %/=% trunc(dc), imc) expect_equal(im %*=% ic %/=% ic, imc) expect_equal(dm %+=% dc %-=% dc, dmc) expect_equal(dm %+=% ic %-=% ic, dmc) expect_equal(dm %*=% dc %/=% dc, dmc) expect_equal(dm %*=% ic %/=% ic, dmc) # Matrix & Matrix expect_equal(im %+=% trunc(dmc) %-=% trunc(dmc), imc) expect_equal(im %+=% imc %-=% imc, imc) expect_equal(im %*=% trunc(dmc) %/=% trunc(dmc), imc) expect_equal(im %*=% imc %/=% imc, imc) expect_equal(dm %+=% dmc %-=% dmc, dmc) expect_equal(dm %+=% imc %-=% imc, dmc) expect_equal(dm %*=% dmc %/=% dmc, dmc) expect_equal(dm %*=% imc %/=% imc, dmc) expect_identical(im, imc) expect_equal(dm, dmc) # Same with setop function # Matrix & Scalar for(o in ops) setop(im, o, 2); expect_identical(im, imc) for(o in ops) setop(dm, o, 2); expect_equal(dm, dmc) for(o in ops) setop(im, o, 2L); expect_identical(im, imc) for(o in ops) setop(dm, o, 2L); expect_equal(dm, dmc) # Matrix & Vector for(o in ops) setop(im, o, trunc(dc)); expect_identical(im, imc) for(o in ops) setop(dm, o, dc); expect_equal(dm, dmc) for(o in ops) setop(im, o, ic); expect_identical(im, imc) for(o in ops) setop(dm, o, ic); expect_equal(dm, dmc) # Matrix & Matrix for(o in ops) setop(im, o, trunc(dmc)); expect_identical(im, imc) for(o in ops) setop(dm, o, dmc); expect_equal(dm, dmc) for(o in ops) setop(im, o, imc); expect_identical(im, imc) for(o in ops) setop(dm, o, imc); expect_equal(dm, dmc) # Row-wise Matrix & Vector for(o in ops) setop(im, o, trunc(dr), rowwise = TRUE); expect_identical(im, imc) for(o in ops) setop(dm, o, dr, rowwise = TRUE); expect_equal(dm, dmc) for(o in ops) setop(im, o, ir, rowwise = TRUE); expect_identical(im, imc) for(o in ops) setop(dm, o, ir, rowwise = TRUE); expect_equal(dm, dmc) # Comparison with TRA (only for doubles) for(o in ops) { expect_equal(setop(dm, o, dr, rowwise = TRUE), TRA(dmc, dr, o)) dm <- data.table::copy(dmc) expect_equal(setop(dm, o, ir, rowwise = TRUE), TRA(dmc, ir, o)) dm <- data.table::copy(dmc) } }) test_that("setop works in operations involving data frames", { # DF & Scalar expect_equal(idf %+=% 2 %-=% 2, idfc) expect_equal(idf %+=% 2L %-=% 2L, idfc) expect_equal(idf %*=% 2 %/=% 2, idfc) expect_equal(idf %*=% 2L %/=% 2L, idfc) expect_equal(ddf %+=% 2 %-=% 2, ddfc) expect_equal(ddf %+=% 2L %-=% 2L, ddfc) expect_equal(ddf %*=% 2 %/=% 2, ddfc) expect_equal(ddf %*=% 2L %/=% 2L, ddfc) # DF & Vector expect_equal(idf %+=% trunc(dc) %-=% trunc(dc), idfc) expect_equal(idf %+=% ic %-=% ic, idfc) expect_equal(idf %*=% trunc(dc) %/=% trunc(dc), idfc) expect_equal(idf %*=% ic %/=% ic, idfc) expect_equal(ddf %+=% dc %-=% dc, ddfc) expect_equal(ddf %+=% ic %-=% ic, ddfc) expect_equal(ddf %*=% dc %/=% dc, ddfc) expect_equal(ddf %*=% ic %/=% ic, ddfc) # DF & DF expect_equal(idf %+=% trunc(ddfc) %-=% trunc(ddfc), idfc) expect_equal(idf %+=% idfc %-=% idfc, idfc) expect_equal(idf %*=% trunc(ddfc) %/=% trunc(ddfc), idfc) expect_equal(idf %*=% idfc %/=% idfc, idfc) expect_equal(ddf %+=% ddfc %-=% ddfc, ddfc) expect_equal(ddf %+=% idfc %-=% idfc, ddfc) expect_equal(ddf %*=% ddfc %/=% ddfc, ddfc) expect_equal(ddf %*=% idfc %/=% idfc, ddfc) expect_identical(idf, idfc) expect_equal(ddf, ddfc) # Same with setop function # DF & Scalar for(o in ops) setop(idf, o, 2); expect_identical(idf, idfc) for(o in ops) setop(ddf, o, 2); expect_equal(ddf, ddfc) for(o in ops) setop(idf, o, 2L); expect_identical(idf, idfc) for(o in ops) setop(ddf, o, 2L); expect_equal(ddf, ddfc) # DF & Vector for(o in ops) setop(idf, o, trunc(dc)); expect_identical(idf, idfc) for(o in ops) setop(ddf, o, dc); expect_equal(ddf, ddfc) for(o in ops) setop(idf, o, ic); expect_identical(idf, idfc) for(o in ops) setop(ddf, o, ic); expect_equal(ddf, ddfc) # DF & DF for(o in ops) setop(idf, o, trunc(ddfc)); expect_identical(idf, idfc) for(o in ops) setop(ddf, o, ddfc); expect_equal(ddf, ddfc) for(o in ops) setop(idf, o, idfc); expect_identical(idf, idfc) for(o in ops) setop(ddf, o, idfc); expect_equal(ddf, ddfc) # Row-wise DF & Vector for(o in ops) setop(idf, o, trunc(dr), rowwise = TRUE); expect_identical(idf, idfc) for(o in ops) setop(ddf, o, dr, rowwise = TRUE); expect_equal(ddf, ddfc) for(o in ops) setop(idf, o, ir, rowwise = TRUE); expect_identical(idf, idfc) for(o in ops) setop(ddf, o, ir, rowwise = TRUE); expect_equal(ddf, ddfc) # Comparison with TRA (only for doubles) for(o in ops) { expect_equal(setop(ddf, o, dr, rowwise = TRUE), TRA(ddfc, dr, o)) ddf <- data.table::copy(ddfc) expect_equal(setop(ddf, o, ir, rowwise = TRUE), TRA(ddfc, ir, o)) ddf <- data.table::copy(ddfc) } }) collapse/tests/testthat/test-fprod.R0000644000176200001440000004515014167346451017310 0ustar liggesuserscontext("fprod") bprod <- base::prod # rm(list = ls()) set.seed(101) x <- rnorm(100) w <- abs(100*rnorm(100)) wdat <- abs(100*rnorm(32)) xNA <- x wNA <- w wdatNA <- wdat xNA[sample.int(100,20)] <- NA wNA[sample.int(100,20)] <- NA wdatNA[sample.int(32, 5)] <- NA f <- as.factor(sample.int(10, 100, TRUE)) g <- GRP(mtcars, ~ cyl + vs + am) gf <- as_factor_GRP(g) mtcNA <- na_insert(mtcars) mtcNA[27,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" na21 <- function(x) { x[is.na(x)] <- 1 x } wprod <- function(x, w, na.rm = FALSE) { if(na.rm) { cc <- complete.cases(x, w) if(!any(cc)) return(NA_real_) x <- x[cc] w <- w[cc] } bprod(x*w) } wBY <- function(x, f, FUN, w, ...) { if(is.atomic(x) && !is.array(x)) return(mapply(FUN, split(x, f), split(w, f), ...)) wspl <- split(w, f) if(is.atomic(x)) return(dapply(x, function(xi) mapply(FUN, split(xi, f), wspl, ...))) qDF(dapply(x, function(xi) mapply(FUN, split(xi, f), wspl, ...), return = "matrix")) } test_that("fprod performs like base::prod", { expect_equal(fprod(NA), as.double(bprod(NA))) expect_equal(fprod(NA, na.rm = FALSE), as.double(bprod(NA))) expect_equal(fprod(1), bprod(1, na.rm = TRUE)) expect_equal(fprod(1:3), bprod(1:3, na.rm = TRUE)) expect_equal(fprod(-1:1), bprod(-1:1, na.rm = TRUE)) expect_equal(fprod(1, na.rm = FALSE), bprod(1)) expect_equal(fprod(1:3, na.rm = FALSE), bprod(1:3)) expect_equal(fprod(-1:1, na.rm = FALSE), bprod(-1:1)) expect_equal(fprod(x), bprod(x, na.rm = TRUE)) expect_equal(fprod(x, na.rm = FALSE), bprod(x)) expect_equal(fprod(xNA, na.rm = FALSE), bprod(xNA)) expect_equal(fprod(xNA), bprod(xNA, na.rm = TRUE)) expect_equal(fprod(mtcars), fprod(m)) expect_equal(fprod(m), dapply(m, bprod, na.rm = TRUE)) expect_equal(fprod(m, na.rm = FALSE), dapply(m, bprod)) expect_equal(fprod(mNA, na.rm = FALSE), dapply(mNA, bprod)) expect_equal(fprod(mNA), dapply(mNA, bprod, na.rm = TRUE)) expect_equal(fprod(mtcars), dapply(mtcars, bprod, na.rm = TRUE)) expect_equal(fprod(mtcars, na.rm = FALSE), dapply(mtcars, bprod)) expect_equal(fprod(mtcNA, na.rm = FALSE), dapply(mtcNA, bprod)) expect_equal(fprod(mtcNA), dapply(mtcNA, bprod, na.rm = TRUE)) expect_equal(fprod(x, f), BY(x, f, bprod, na.rm = TRUE)) expect_equal(fprod(x, f, na.rm = FALSE), BY(x, f, bprod)) expect_equal(fprod(xNA, f, na.rm = FALSE), BY(xNA, f, bprod)) expect_equal(na21(fprod(xNA, f)), BY(xNA, f, bprod, na.rm = TRUE)) expect_equal(fprod(m, g), BY(m, g, bprod, na.rm = TRUE)) expect_equal(fprod(m, g, na.rm = FALSE), BY(m, g, bprod)) expect_equal(fprod(mNA, g, na.rm = FALSE), BY(mNA, g, bprod)) expect_equal(na21(fprod(mNA, g)), BY(mNA, g, bprod, na.rm = TRUE)) # bprod(NA, na.rm = TRUE) gives 1 expect_equal(fprod(mtcars, g), BY(mtcars, g, bprod, na.rm = TRUE)) expect_equal(fprod(mtcars, g, na.rm = FALSE), BY(mtcars, g, bprod)) expect_equal(fprod(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bprod)) expect_equal(na21(fprod(mtcNA, g)), BY(mtcNA, g, bprod, na.rm = TRUE)) # bprod(NA, na.rm = TRUE) gives 1 }) test_that("fprod with weights performs like wprod (defined above)", { # complete weights expect_equal(fprod(NA, w = 1), wprod(NA, 1)) expect_equal(fprod(NA, w = 1, na.rm = FALSE), wprod(NA, 1)) expect_equal(fprod(1, w = 1), wprod(1, w = 1)) expect_equal(fprod(1:3, w = 1:3), wprod(1:3, 1:3)) expect_equal(fprod(-1:1, w = 1:3), wprod(-1:1, 1:3)) expect_equal(fprod(1, w = 1, na.rm = FALSE), wprod(1, 1)) expect_equal(fprod(1:3, w = c(0.99,3454,1.111), na.rm = FALSE), wprod(1:3, c(0.99,3454,1.111))) expect_equal(fprod(-1:1, w = 1:3, na.rm = FALSE), wprod(-1:1, 1:3)) expect_equal(fprod(x, w = w), wprod(x, w)) expect_equal(fprod(x, w = w, na.rm = FALSE), wprod(x, w)) expect_equal(fprod(xNA, w = w, na.rm = FALSE), wprod(xNA, w)) expect_equal(fprod(xNA, w = w), wprod(xNA, w, na.rm = TRUE)) expect_equal(fprod(mtcars, w = wdat), fprod(m, w = wdat)) expect_equal(fprod(m, w = wdat), dapply(m, wprod, wdat, na.rm = TRUE)) expect_equal(fprod(m, w = wdat, na.rm = FALSE), dapply(m, wprod, wdat)) expect_equal(fprod(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wprod, wdat)) expect_equal(fprod(mNA, w = wdat), dapply(mNA, wprod, wdat, na.rm = TRUE)) expect_equal(fprod(mtcars, w = wdat), dapply(mtcars, wprod, wdat, na.rm = TRUE)) expect_equal(fprod(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wprod, wdat)) expect_equal(fprod(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wprod, wdat)) expect_equal(fprod(mtcNA, w = wdat), dapply(mtcNA, wprod, wdat, na.rm = TRUE)) expect_equal(fprod(x, f, w), wBY(x, f, wprod, w)) expect_equal(fprod(x, f, w, na.rm = FALSE), wBY(x, f, wprod, w)) expect_equal(fprod(xNA, f, w, na.rm = FALSE), wBY(xNA, f, wprod, w)) expect_equal(fprod(xNA, f, w), wBY(xNA, f, wprod, w, na.rm = TRUE)) expect_equal(fprod(m, g, wdat), wBY(m, gf, wprod, wdat)) expect_equal(fprod(m, g, wdat, na.rm = FALSE), wBY(m, gf, wprod, wdat)) expect_equal(fprod(mNA, g, wdat, na.rm = FALSE), wBY(mNA, gf, wprod, wdat)) expect_equal(fprod(mNA, g, wdat), wBY(mNA, gf, wprod, wdat, na.rm = TRUE)) expect_equal(fprod(mtcars, g, wdat), wBY(mtcars, gf, wprod, wdat)) expect_equal(fprod(mtcars, g, wdat, na.rm = FALSE), wBY(mtcars, gf, wprod, wdat)) expect_equal(fprod(mtcNA, g, wdat, na.rm = FALSE), wBY(mtcNA, gf, wprod, wdat)) expect_equal(fprod(mtcNA, g, wdat), wBY(mtcNA, gf, wprod, wdat, na.rm = TRUE)) # missing weights expect_equal(fprod(NA, w = NA), wprod(NA, NA)) expect_equal(fprod(NA, w = NA, na.rm = FALSE), wprod(NA, NA)) expect_equal(fprod(1, w = NA), wprod(1, w = NA)) expect_equal(fprod(1:3, w = c(NA,1:2)), wprod(1:3, c(NA,1:2), na.rm = TRUE)) expect_equal(fprod(-1:1, w = c(NA,1:2)), wprod(-1:1, c(NA,1:2), na.rm = TRUE)) expect_equal(fprod(1, w = NA, na.rm = FALSE), wprod(1, NA)) expect_equal(fprod(1:3, w = c(NA,1:2), na.rm = FALSE), wprod(1:3, c(NA,1:2))) expect_equal(fprod(-1:1, w = c(NA,1:2), na.rm = FALSE), wprod(-1:1, c(NA,1:2))) expect_equal(fprod(x, w = wNA), wprod(x, wNA, na.rm = TRUE)) expect_equal(fprod(x, w = wNA, na.rm = FALSE), wprod(x, wNA)) expect_equal(fprod(xNA, w = wNA, na.rm = FALSE), wprod(xNA, wNA)) expect_equal(fprod(xNA, w = wNA), wprod(xNA, wNA, na.rm = TRUE)) expect_equal(fprod(mtcars, w = wdatNA), fprod(m, w = wdatNA)) expect_equal(fprod(m, w = wdatNA), dapply(m, wprod, wdatNA, na.rm = TRUE)) expect_equal(fprod(m, w = wdatNA, na.rm = FALSE), dapply(m, wprod, wdatNA)) expect_equal(fprod(mNA, w = wdatNA, na.rm = FALSE), dapply(mNA, wprod, wdatNA)) expect_equal(fprod(mNA, w = wdatNA), dapply(mNA, wprod, wdatNA, na.rm = TRUE)) expect_equal(fprod(mtcars, w = wdatNA), dapply(mtcars, wprod, wdatNA, na.rm = TRUE)) expect_equal(fprod(mtcars, w = wdatNA, na.rm = FALSE), dapply(mtcars, wprod, wdatNA)) expect_equal(fprod(mtcNA, w = wdatNA, na.rm = FALSE), dapply(mtcNA, wprod, wdatNA)) expect_equal(fprod(mtcNA, w = wdatNA), dapply(mtcNA, wprod, wdatNA, na.rm = TRUE)) expect_equal(fprod(x, f, wNA), wBY(x, f, wprod, wNA, na.rm = TRUE)) expect_equal(fprod(x, f, wNA, na.rm = FALSE), wBY(x, f, wprod, wNA)) expect_equal(fprod(xNA, f, wNA, na.rm = FALSE), wBY(xNA, f, wprod, wNA)) expect_equal(fprod(xNA, f, wNA), wBY(xNA, f, wprod, wNA, na.rm = TRUE)) expect_equal(fprod(m, g, wdatNA), wBY(m, gf, wprod, wdatNA, na.rm = TRUE)) expect_equal(fprod(m, g, wdatNA, na.rm = FALSE), wBY(m, gf, wprod, wdatNA)) expect_equal(fprod(mNA, g, wdatNA, na.rm = FALSE), wBY(mNA, gf, wprod, wdatNA)) expect_equal(fprod(mNA, g, wdatNA), wBY(mNA, gf, wprod, wdatNA, na.rm = TRUE)) expect_equal(fprod(mtcars, g, wdatNA), wBY(mtcars, gf, wprod, wdatNA, na.rm = TRUE)) expect_equal(fprod(mtcars, g, wdatNA, na.rm = FALSE), wBY(mtcars, gf, wprod, wdatNA)) expect_equal(fprod(mtcNA, g, wdatNA, na.rm = FALSE), wBY(mtcNA, gf, wprod, wdatNA)) expect_equal(fprod(mtcNA, g, wdatNA), wBY(mtcNA, gf, wprod, wdatNA, na.rm = TRUE)) }) test_that("fprod performs numerically stable", { expect_true(all_obj_equal(replicate(50, fprod(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, g), simplify = FALSE))) }) test_that("fprod with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fprod(1, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(NA, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(NA, w = 1, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fprod with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fprod(1, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(NA, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(NA, w = NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, g, wdatNA), simplify = FALSE))) }) test_that("fprod handles special values in the right way", { expect_equal(fprod(NA), NA_real_) expect_equal(fprod(NaN), NaN) expect_equal(fprod(Inf), Inf) expect_equal(fprod(-Inf), -Inf) expect_equal(fprod(TRUE), 1) expect_equal(fprod(FALSE), 0) expect_equal(fprod(NA, na.rm = FALSE), NA_real_) expect_equal(fprod(NaN, na.rm = FALSE), NaN) expect_equal(fprod(Inf, na.rm = FALSE), Inf) expect_equal(fprod(-Inf, na.rm = FALSE), -Inf) expect_equal(fprod(TRUE, na.rm = FALSE), 1) expect_equal(fprod(FALSE, na.rm = FALSE), 0) expect_equal(fprod(c(1,NA)), 1) expect_equal(fprod(c(1,NaN)), 1) expect_equal(fprod(c(1,Inf)), Inf) expect_equal(fprod(c(1,-Inf)), -Inf) expect_equal(fprod(c(FALSE,TRUE)), 0) expect_equal(fprod(c(TRUE,TRUE)), 1) expect_equal(fprod(c(1,Inf), na.rm = FALSE), Inf) expect_equal(fprod(c(1,-Inf), na.rm = FALSE), -Inf) expect_equal(fprod(c(FALSE,TRUE), na.rm = FALSE), 0) expect_equal(fprod(c(TRUE,TRUE), na.rm = FALSE), 1) }) test_that("fprod with weights handles special values in the right way", { expect_equal(fprod(NA, w = 1), NA_real_) expect_equal(fprod(NaN, w = 1), NaN) expect_equal(fprod(Inf, w = 1), Inf) expect_equal(fprod(-Inf, w = 1), -Inf) expect_equal(fprod(TRUE, w = 1), 1) expect_equal(fprod(FALSE, w = 1), 0) expect_equal(fprod(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fprod(NaN, w = 1, na.rm = FALSE), NaN) expect_equal(fprod(Inf, w = 1, na.rm = FALSE), Inf) expect_equal(fprod(-Inf, w = 1, na.rm = FALSE), -Inf) expect_equal(fprod(TRUE, w = 1, na.rm = FALSE), 1) expect_equal(fprod(FALSE, w = 1, na.rm = FALSE), 0) expect_equal(fprod(NA, w = NA), NA_real_) expect_equal(fprod(NaN, w = NA), NA_real_) expect_equal(fprod(Inf, w = NA), NA_real_) expect_equal(fprod(-Inf, w = NA), NA_real_) expect_equal(fprod(TRUE, w = NA), NA_real_) expect_equal(fprod(FALSE, w = NA), NA_real_) expect_equal(fprod(NA, w = NA, na.rm = FALSE), NA_real_) expect_equal(fprod(NaN, w = NA, na.rm = FALSE), NA_real_) expect_equal(fprod(Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fprod(-Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fprod(TRUE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fprod(FALSE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fprod(1:3, w = c(1,Inf,3)), Inf) expect_equal(fprod(1:3, w = c(1,-Inf,3)), -Inf) expect_equal(fprod(1:3, w = c(1,Inf,3), na.rm = FALSE), Inf) expect_equal(fprod(1:3, w = c(1,-Inf,3), na.rm = FALSE), -Inf) }) test_that("fprod produces errors for wrong input", { expect_error(fprod("a")) expect_error(fprod(NA_character_)) expect_error(fprod(mNAc)) expect_error(fprod(mNAc, f)) expect_error(fprod(1:2,1:3)) expect_error(fprod(m,1:31)) expect_error(fprod(mtcars,1:31)) expect_error(fprod(mtcars, w = 1:31)) expect_error(fprod("a", w = 1)) expect_error(fprod(1:2, w = 1:3)) expect_error(fprod(NA_character_, w = 1)) expect_error(fprod(mNAc, w = wdat)) expect_error(fprod(mNAc, f, wdat)) expect_error(fprod(mNA, w = 1:33)) expect_error(fprod(1:2,1:2, 1:3)) expect_error(fprod(m,1:32,1:20)) expect_error(fprod(mtcars,1:32,1:10)) expect_error(fprod(1:2, w = c("a","b"))) expect_error(fprod(wlddev)) expect_error(fprod(wlddev, w = wlddev$year)) expect_error(fprod(wlddev, wlddev$iso3c)) expect_error(fprod(wlddev, wlddev$iso3c, wlddev$year)) }) collapse/tests/testthat/test-recode-replace.R0000644000176200001440000001266314057472125021047 0ustar liggesuserscontext("recode, replace") gmtc <- fgroup_by(mtcars, cyl) test_that("replace_NA and replace_Inf work well", { expect_equal(replace_NA(airquality, 0), `[<-`(airquality, is.na(airquality), value = 0)) expect_equal(replace_NA(airquality, 0, cols = 1:2), `[<-`(airquality, is.na(airquality), value = 0)) expect_equal(replace_NA(airquality, 0, cols = is.numeric), `[<-`(airquality, is.na(airquality), value = 0)) expect_equal(replace_NA(flag(EuStockMarkets), 0), `[<-`(flag(EuStockMarkets), is.na(flag(EuStockMarkets)), value = 0)) expect_equal(replace_Inf(dapply(mtcars, log)), `[<-`(dapply(mtcars, log), sapply(dapply(mtcars, log), is.infinite), value = NA)) expect_equal(replace_Inf(log(EuStockMarkets)), `[<-`(log(EuStockMarkets), is.infinite(log(EuStockMarkets)), value = NA)) expect_equal(replace_Inf(dapply(mtcars, log), replace.nan = TRUE), `[<-`(dapply(mtcars, log), sapply(dapply(mtcars, log), is.infinite), value = NA)) expect_equal(replace_Inf(log(EuStockMarkets), replace.nan = TRUE), `[<-`(log(EuStockMarkets), is.infinite(log(EuStockMarkets)), value = NA)) }) test_that("replace_outliers works well.", { expect_equal(replace_outliers(mtcars, 2), replace(mtcars, fscale(mtcars) > 2, NA)) expect_equal(replace_outliers(gmtc, 2, single.limit = "overall_SDs"), replace(gmtc, dapply(mtcars, fscale) > 2, NA)) expect_equal(replace_outliers(mtcars, 2, single.limit = "min"), replace(mtcars, mtcars < 2, NA)) expect_equal(replace_outliers(mtcars, 2, single.limit = "max"), replace(mtcars, mtcars > 2, NA)) expect_equal(replace_outliers(EuStockMarkets, 2), replace(EuStockMarkets, fscale(EuStockMarkets) > 2, NA)) expect_equal(replace_outliers(EuStockMarkets, 2, single.limit = "overall_SDs"), replace(EuStockMarkets, dapply(EuStockMarkets, fscale) > 2, NA)) expect_equal(replace_outliers(EuStockMarkets, 2, single.limit = "min"), replace(EuStockMarkets, EuStockMarkets < 2, NA)) expect_equal(replace_outliers(EuStockMarkets, 2, single.limit = "max"), replace(EuStockMarkets, EuStockMarkets > 2, NA)) }) set.seed(101) lmiss <- na_insert(letters) month.miss <- na_insert(month.name) char_dat <- na_insert(char_vars(GGDC10S)) options(warn = -1) test_that("recode_char works well", { expect_equal(recode_char(lmiss, a = "b"), replace(lmiss, lmiss == "a", "b")) expect_visible(recode_char(lmiss, a = "b", missing = "a")) # continue here to write proper tests!!.. expect_visible(recode_char(lmiss, a = "b", missing = "c")) expect_visible(recode_char(lmiss, a = "b", default = "n")) expect_visible(recode_char(lmiss, a = "b", default = "n", missing = "c")) expect_visible(recode_char(month.miss, ber = NA, regex = TRUE)) expect_visible(recode_char(month.miss, ber = NA, missing = "c", regex = TRUE)) expect_visible(recode_char(lmiss, ber = NA, default = "n", regex = TRUE)) expect_visible(recode_char(lmiss, ber = NA, default = "n", missing = "c", regex = TRUE)) expect_visible(recode_char(lmiss, a = "b", e = "f")) expect_visible(recode_char(lmiss, a = "b", e = "f", missing = "a")) expect_visible(recode_char(lmiss, a = "b", e = "f", missing = "c")) expect_visible(recode_char(lmiss, a = "b", e = "f", default = "n")) expect_visible(recode_char(lmiss, a = "b", e = "f", default = "n", missing = "c")) expect_visible(recode_char(month.miss, ber = NA, May = "a", regex = TRUE)) expect_visible(recode_char(month.miss, ber = NA, May = "a", missing = "c", regex = TRUE)) expect_visible(recode_char(lmiss, ber = NA, May = "a", default = "n", regex = TRUE)) expect_visible(recode_char(lmiss, ber = NA, May = "a", default = "n", missing = "c", regex = TRUE)) expect_visible(recode_char(char_dat, SGP = "SINGAPORE", VA = "Value Added")) expect_visible(recode_char(char_dat, SGP = "SINGAPORE", VA = "Value Added", missing = "c")) expect_visible(recode_char(char_dat, SGP = "SINGAPORE", VA = "Value Added", default = "n")) expect_visible(recode_char(char_dat, SGP = "SINGAPORE", VA = "Value Added", default = "n", missing = "c")) expect_visible(recode_char(char_dat, saharan = "SSA", regex = TRUE)) expect_visible(recode_char(char_dat, saharan = "SSA", regex = TRUE, missing = "c")) expect_visible(recode_char(char_dat, saharan = "SSA", regex = TRUE, default = "n")) expect_visible(recode_char(char_dat, saharan = "SSA", regex = TRUE, default = "n", missing = "c")) }) set.seed(101) vmiss <- na_insert(mtcars$cyl) mtcNA <- na_insert(mtcars) test_that("recode_num works well", { expect_equal(recode_num(vmiss, `4` = 5), replace(vmiss, vmiss == 4, 5)) expect_visible(recode_num(vmiss, `4` = 5, missing = 4)) # continue here to write proper tests!!.. expect_visible(recode_num(vmiss, `4` = 5, missing = 7)) expect_visible(recode_num(vmiss, `4` = 5, default = 8)) expect_visible(recode_num(vmiss, `4` = 5, default = 8, missing = 7)) expect_visible(recode_num(vmiss, `4` = 5, `6` = 10)) expect_visible(recode_num(vmiss, `4` = 5, `6` = 10, missing = 6)) expect_visible(recode_num(vmiss, `4` = 5, `6` = 10, missing = 7)) expect_visible(recode_num(vmiss, `4` = 5, `6` = 10, default = 8)) expect_visible(recode_num(vmiss, `4` = 5, `6` = 10, default = 8, missing = 7)) expect_visible(recode_num(mtcNA, `4` = 5, `1` = 2)) expect_visible(recode_num(mtcNA, `4` = 5, `1` = 2, missing = 6)) expect_visible(recode_num(mtcNA, `4` = 5, `1` = 2, default = 8)) expect_visible(recode_num(mtcNA, `4` = 5, `1` = 2, default = 8, missing = 7)) }) options(warn = 1) collapse/tests/testthat/test-sf.R0000644000176200001440000000347414172533272016604 0ustar liggesuserscontext("collapse and sf") if(Sys.getenv("NMAC") == "TRUE") { library(sf) nc <- st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE) test_that("sf methods work properly", { expect_visible(nc %>% fgroup_by(AREA)) expect_visible(nc %>% fgroup_by(AREA) %>% fgroup_vars) expect_visible(descr(nc)) expect_visible(qsu(nc)) expect_visible(varying(nc)) expect_true(any(names(num_vars(nc)) == "geometry")) expect_true(any(names(fselect(nc, AREA, NAME:FIPSNO)) == "geometry")) expect_true(any(names(gv(nc, c("AREA", "NAME", "FIPS", "FIPSNO"))) == "geometry")) expect_true(any(names(fsubset(nc, AREA > fmean(AREA), AREA, NAME:FIPSNO)) == "geometry")) expect_true(any(names(ss(nc, 1:10, c("AREA", "NAME", "FIPS", "FIPSNO"))) == "geometry")) expect_true(inherits(rsplit(nc, AREA ~ SID74)[[1L]], "sf")) expect_equal(names(`nv<-`(nc, NULL)), c("NAME", "FIPS", "geometry")) # nv(nc) <- NULL expect_equal(tfmv(nc, is.numeric, log), tfmv(nc, is.numeric, log, apply = FALSE)) expect_equal(length(nc %>% gby(NAME) %>% varying), length(nc) - 2L) expect_true(is.data.frame(nc %>% gby(NAME) %>% varying(any_group = FALSE))) expect_visible(funique(nc, cols = 1)) expect_true(length(fcompute(nc, log_AREA = log(AREA))) == 2L) expect_true(length(fcomputev(nc, "AREA", log)) == 2L) expect_true(length(fcomputev(nc, "AREA", log, keep = "PERIMETER")) == 3L) expect_true(length(fcomputev(nc, "AREA", fscale, apply = FALSE)) == 2L) expect_true(length(fcomputev(nc, "AREA", fscale, apply = FALSE, keep = "PERIMETER")) == 3L) expect_true(inherits(nc %>% fgroup_by(SID74) %>% fsummarise(AREA_Ag = fsum(AREA), Perimeter_Ag = fmedian(PERIMETER), geometry = st_union(geometry)), "sf")) }) } collapse/tests/testthat/test-fcumsum.R0000644000176200001440000006604214167346517017663 0ustar liggesuserscontext("fcumsum") # rm(liso = ls()) set.seed(101) x <- abs(1000*rnorm(100)) xNA <- x xNA[sample.int(100, 20)] <- NA xNA[1L] <- NA f <- as.factor(rep(1:10, each = 10)) t <- as.factor(rep(1:100)) data <- wlddev[wlddev$iso3c %in% c("BLZ","IND","USA","SRB","GRL"), ] settransform(data, ODA = NULL, POP = NULL) # Too large (integer overflow) g <- GRP(droplevels(data$iso3c)) td <- as.factor(data$year) dataNA <- na_insert(data) m <- as.matrix(data) suppressWarnings(storage.mode(m) <- "numeric") mNAc <- as.matrix(dataNA) mNA <- mNAc suppressWarnings(storage.mode(mNA) <- "numeric") # Creatung unordered data: o = order(rnorm(100)) xuo = x[o] xNAuo = xNA[o] fuo = f[o] tuo = t[o] t2uo = seq_len(100)[o] o = order(o) od = order(rnorm(length(td))) muo = m[od, ] mNAuo <- mNA[od, ] datauo = data[od, ] dataNAuo = dataNA[od, ] guo = as_factor_GRP(g)[od] tduo = td[od] t2duo = seq_along(od)[od] od = order(od) bcumsum <- base::cumsum basecumsum <- function(x, na.rm = TRUE, fill = FALSE) { ax <- attributes(x) if(!na.rm || !anyNA(x)) return(`attributes<-`(bcumsum(x), ax)) cc <- which(!is.na(x)) x[cc] <- bcumsum(x[cc]) if(!fill) return(x) if(is.na(x[1L])) x[1L] <- 0L data.table::nafill(x, type = "locf") } rnBY <- function(x, ...) { if(is.list(x) || is.array(x)) return(setRownames(BY(x, ...), if(is.list(x)) attr(x, "row.names") else dimnames(x)[[1L]])) BY(x, ...) } test_that("fcumsum performs like basecumsum", { # No groups, no ordering expect_equal(fcumsum(-10:10), basecumsum(-10:10)) expect_equal(fcumsum(-10:10, na.rm = FALSE), basecumsum(-10:10, na.rm = FALSE)) expect_equal(fcumsum(-10:10, fill = TRUE), basecumsum(-10:10, fill = TRUE)) expect_equal(fcumsum(x), basecumsum(x)) expect_equal(fcumsum(x, na.rm = FALSE), basecumsum(x, na.rm = FALSE)) expect_equal(fcumsum(x, fill = TRUE), basecumsum(x, fill = TRUE)) expect_equal(fcumsum(xNA), basecumsum(xNA)) expect_equal(fcumsum(xNA, na.rm = FALSE), basecumsum(xNA, na.rm = FALSE)) expect_equal(fcumsum(xNA, fill = TRUE), basecumsum(xNA, fill = TRUE)) expect_equal(fcumsum(m), dapply(m, basecumsum)) expect_equal(fcumsum(m, na.rm = FALSE), dapply(m, basecumsum, na.rm = FALSE)) expect_equal(fcumsum(m, fill = TRUE), dapply(m, basecumsum, fill = TRUE)) expect_equal(fcumsum(mNA), dapply(mNA, basecumsum)) expect_equal(fcumsum(mNA, na.rm = FALSE), dapply(mNA, basecumsum, na.rm = FALSE)) expect_equal(fcumsum(mNA, fill = TRUE), dapply(mNA, basecumsum, fill = TRUE)) expect_equal(fcumsum(num_vars(data)), dapply(num_vars(data), basecumsum)) expect_equal(fcumsum(num_vars(data), na.rm = FALSE), dapply(num_vars(data), basecumsum, na.rm = FALSE)) expect_equal(fcumsum(num_vars(data), fill = TRUE), dapply(num_vars(data), basecumsum, fill = TRUE)) expect_equal(fcumsum(num_vars(dataNA)), dapply(num_vars(dataNA), basecumsum)) expect_equal(fcumsum(num_vars(dataNA), na.rm = FALSE), dapply(num_vars(dataNA), basecumsum, na.rm = FALSE)) expect_equal(fcumsum(num_vars(dataNA), fill = TRUE), dapply(num_vars(dataNA), basecumsum, fill = TRUE)) # With groups, no ordering expect_equal(fcumsum(x, f), BY(x, f, basecumsum, use.g.names = FALSE)) expect_equal(fcumsum(x, na.rm = FALSE, f), BY(x, f, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_equal(fcumsum(x, f, fill = TRUE), BY(x, f, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_equal(fcumsum(xNA, f), BY(xNA, f, basecumsum, use.g.names = FALSE)) expect_equal(fcumsum(xNA, na.rm = FALSE, f), BY(xNA, f, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_equal(fcumsum(xNA, f, fill = TRUE), BY(xNA, f, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_equal(fcumsum(m, g), rnBY(m, g, basecumsum, use.g.names = FALSE)) expect_equal(fcumsum(m, na.rm = FALSE, g), rnBY(m, g, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_equal(fcumsum(m, g, fill = TRUE), rnBY(m, g, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_equal(fcumsum(mNA, g), rnBY(mNA, g, basecumsum, use.g.names = FALSE)) expect_equal(fcumsum(mNA, na.rm = FALSE, g), rnBY(mNA, g, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_equal(fcumsum(mNA, g, fill = TRUE), rnBY(mNA, g, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_equal(fcumsum(num_vars(data), g), rnBY(num_vars(data), g, basecumsum, use.g.names = FALSE)) expect_equal(fcumsum(num_vars(data), na.rm = FALSE, g), rnBY(num_vars(data), g, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_equal(fcumsum(num_vars(data), g, fill = TRUE), rnBY(num_vars(data), g, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_equal(fcumsum(num_vars(dataNA), g), rnBY(num_vars(dataNA), g, basecumsum, use.g.names = FALSE)) expect_equal(fcumsum(num_vars(dataNA), g, na.rm = FALSE), rnBY(num_vars(dataNA), g, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_equal(fcumsum(num_vars(dataNA), g, fill = TRUE), rnBY(num_vars(dataNA), g, basecumsum, fill = TRUE, use.g.names = FALSE)) }) test_that("fcumsum correctly handles unordered time-series and panel-series computations", { # With ordering, no groups: 1 expect_equal(fcumsum(x, o = 1:100), fcumsum(x)) expect_equal(fcumsum(x, o = 1:100, na.rm = FALSE), fcumsum(x, na.rm = FALSE)) expect_equal(fcumsum(x, o = 1:100, fill = TRUE), fcumsum(x, fill = TRUE)) expect_equal(fcumsum(xNA, o = 1:100), fcumsum(xNA)) expect_equal(fcumsum(xNA, o = 1:100, na.rm = FALSE), fcumsum(xNA, na.rm = FALSE)) expect_equal(fcumsum(xNA, o = 1:100, fill = TRUE), fcumsum(xNA, fill = TRUE)) expect_equal(fcumsum(m, o = seq_row(m)), fcumsum(m)) expect_equal(fcumsum(m, o = seq_row(m), na.rm = FALSE), fcumsum(m, na.rm = FALSE)) expect_equal(fcumsum(m, o = seq_row(m), fill = TRUE), fcumsum(m, fill = TRUE)) expect_equal(fcumsum(mNA, o = seq_row(m)), fcumsum(mNA)) expect_equal(fcumsum(mNA, o = seq_row(m), na.rm = FALSE), fcumsum(mNA, na.rm = FALSE)) expect_equal(fcumsum(mNA, o = seq_row(m), fill = TRUE), fcumsum(mNA, fill = TRUE)) expect_equal(fcumsum(num_vars(data), o = seq_row(data)), fcumsum(num_vars(data))) expect_equal(fcumsum(num_vars(data), o = seq_row(data), na.rm = FALSE), fcumsum(num_vars(data), na.rm = FALSE)) expect_equal(fcumsum(num_vars(data), o = seq_row(data), fill = TRUE), fcumsum(num_vars(data), fill = TRUE)) expect_equal(fcumsum(num_vars(dataNA), o = seq_row(data)), fcumsum(num_vars(dataNA))) expect_equal(fcumsum(num_vars(dataNA), o = seq_row(data), na.rm = FALSE), fcumsum(num_vars(dataNA), na.rm = FALSE)) expect_equal(fcumsum(num_vars(dataNA), o = seq_row(data), fill = TRUE), fcumsum(num_vars(dataNA), fill = TRUE)) # With ordering, no groups: 2 expect_equal(fcumsum(xuo, o = t2uo)[o], fcumsum(x)) expect_equal(fcumsum(xuo, o = t2uo, na.rm = FALSE)[o], fcumsum(x, na.rm = FALSE)) expect_equal(fcumsum(xuo, o = t2uo, fill = TRUE)[o], fcumsum(x, fill = TRUE)) expect_equal(fcumsum(xNAuo, o = t2uo)[o], fcumsum(xNA)) expect_equal(fcumsum(xNAuo, o = t2uo, na.rm = FALSE)[o], fcumsum(xNA, na.rm = FALSE)) expect_equal(fcumsum(xNAuo, o = t2uo, fill = TRUE)[o], fcumsum(xNA, fill = TRUE)) expect_equal(fcumsum(muo, o = t2duo)[od, ], fcumsum(m)) expect_equal(fcumsum(muo, o = t2duo, na.rm = FALSE)[od, ], fcumsum(m, na.rm = FALSE)) expect_equal(fcumsum(muo, o = t2duo, fill = TRUE)[od, ], fcumsum(m, fill = TRUE)) expect_equal(fcumsum(mNAuo, o = t2duo)[od, ], fcumsum(mNA)) expect_equal(fcumsum(mNAuo, o = t2duo, na.rm = FALSE)[od, ], fcumsum(mNA, na.rm = FALSE)) expect_equal(fcumsum(mNAuo, o = t2duo, fill = TRUE)[od, ], fcumsum(mNA, fill = TRUE)) expect_equal(fcumsum(num_vars(datauo), o = t2duo)[od, ], fcumsum(num_vars(data))) expect_equal(fcumsum(num_vars(datauo), o = t2duo, na.rm = FALSE)[od, ], fcumsum(num_vars(data), na.rm = FALSE)) expect_equal(fcumsum(num_vars(datauo), o = t2duo, fill = TRUE)[od, ], fcumsum(num_vars(data), fill = TRUE)) expect_equal(fcumsum(num_vars(dataNAuo), o = t2duo)[od, ], fcumsum(num_vars(dataNA))) expect_equal(fcumsum(num_vars(dataNAuo), o = t2duo, na.rm = FALSE)[od, ], fcumsum(num_vars(dataNA), na.rm = FALSE)) expect_equal(fcumsum(num_vars(dataNAuo), o = t2duo, fill = TRUE)[od, ], fcumsum(num_vars(dataNA), fill = TRUE)) # With ordering and groups expect_equal(fcumsum(xuo, fuo, tuo)[o], fcumsum(x, f, t)) expect_equal(fcumsum(xuo, fuo, tuo, na.rm = FALSE)[o], fcumsum(x, f, t, na.rm = FALSE)) expect_equal(fcumsum(xuo, fuo, tuo, fill = TRUE)[o], fcumsum(x, f, t, fill = TRUE)) expect_equal(fcumsum(xNAuo, fuo, tuo)[o], fcumsum(xNA, f, t)) expect_equal(fcumsum(xNAuo, fuo, tuo, na.rm = FALSE)[o], fcumsum(xNA, f, t, na.rm = FALSE)) expect_equal(fcumsum(xNAuo, fuo, tuo, fill = TRUE)[o], fcumsum(xNA, f, t, fill = TRUE)) expect_equal(fcumsum(muo, guo, tduo)[od, ], fcumsum(m, g, td)) expect_equal(fcumsum(muo, guo, tduo, na.rm = FALSE)[od, ], fcumsum(m, g, td, na.rm = FALSE)) expect_equal(fcumsum(muo, guo, tduo, fill = TRUE)[od, ], fcumsum(m, g, td, fill = TRUE)) expect_equal(fcumsum(mNAuo, guo, tduo)[od, ], fcumsum(mNA, g, td)) expect_equal(fcumsum(mNAuo, guo, tduo, na.rm = FALSE)[od, ], fcumsum(mNA, g, td, na.rm = FALSE)) expect_equal(fcumsum(mNAuo, guo, tduo, fill = TRUE)[od, ], fcumsum(mNA, g, td, fill = TRUE)) expect_equal(fcumsum(num_vars(datauo), guo, tduo)[od, ], fcumsum(num_vars(data), g, td)) expect_equal(fcumsum(num_vars(datauo), guo, tduo, na.rm = FALSE)[od, ], fcumsum(num_vars(data), g, td, na.rm = FALSE)) expect_equal(fcumsum(num_vars(datauo), guo, tduo, fill = TRUE)[od, ], fcumsum(num_vars(data), g, td, fill = TRUE)) expect_equal(fcumsum(num_vars(dataNAuo), guo, tduo)[od, ], fcumsum(num_vars(dataNA), g, td)) expect_equal(fcumsum(num_vars(dataNAuo), guo, tduo, na.rm = FALSE)[od, ], fcumsum(num_vars(dataNA), g, td, na.rm = FALSE)) expect_equal(fcumsum(num_vars(dataNAuo), guo, tduo, fill = TRUE)[od, ], fcumsum(num_vars(dataNA), g, td, fill = TRUE)) }) test_that("fcumsum performs numerically stable in ordered computations", { expect_true(all_obj_equal(replicate(50, fcumsum(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(data)), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(dataNA)), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(x, f, fill = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(xNA, f, fill = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(m, g, fill = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(mNA, g, fill = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(data), g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(data), g, fill = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(dataNA), g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(dataNA), g, fill = TRUE), simplify = FALSE))) }) test_that("fcumsum performs numerically stable in unordered computations", { expect_true(all_obj_equal(replicate(50, fcumsum(xuo, o = t2uo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(xNAuo, o = t2uo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(muo, o = t2duo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(datauo), o = t2duo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(xuo, fuo, tuo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(xuo, fuo, tuo, fill = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(muo, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(muo, guo, tduo, fill = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(datauo), guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(datauo), guo, tduo, fill = TRUE), simplify = FALSE))) }) # Testing integer methods test_that("Integer overflow gives error", { expect_error(fcumsum(1:1e5)) expect_error(fcumsum(-1:-1e5)) }) x <- as.integer(x) xNA <- as.integer(xNA) storage.mode(m) <- "integer" storage.mode(mNA) <- "integer" settransformv(data, is.numeric, as.integer) settransformv(dataNA, is.numeric, as.integer) xuo <- as.integer(xuo) xNAuo <- as.integer(xNAuo) storage.mode(muo) <- "integer" storage.mode(mNAuo) <- "integer" settransformv(datauo, is.numeric, as.integer) settransformv(dataNAuo, is.numeric, as.integer) test_that("fcumsum with integers performs like basecumsum", { # No groups, no ordering expect_identical(fcumsum(x), basecumsum(x)) expect_identical(fcumsum(x, na.rm = FALSE), basecumsum(x, na.rm = FALSE)) expect_identical(fcumsum(x, fill = TRUE), basecumsum(x, fill = TRUE)) expect_identical(fcumsum(xNA), basecumsum(xNA)) expect_identical(fcumsum(xNA, na.rm = FALSE), basecumsum(xNA, na.rm = FALSE)) expect_identical(fcumsum(xNA, fill = TRUE), basecumsum(xNA, fill = TRUE)) expect_identical(fcumsum(m), dapply(m, basecumsum)) expect_identical(fcumsum(m, na.rm = FALSE), dapply(m, basecumsum, na.rm = FALSE)) expect_identical(fcumsum(m, fill = TRUE), dapply(m, basecumsum, fill = TRUE)) expect_identical(fcumsum(mNA), dapply(mNA, basecumsum)) expect_identical(fcumsum(mNA, na.rm = FALSE), dapply(mNA, basecumsum, na.rm = FALSE)) expect_identical(fcumsum(mNA, fill = TRUE), dapply(mNA, basecumsum, fill = TRUE)) expect_identical(fcumsum(num_vars(data)), dapply(num_vars(data), basecumsum)) expect_identical(fcumsum(num_vars(data), na.rm = FALSE), dapply(num_vars(data), basecumsum, na.rm = FALSE)) expect_identical(fcumsum(num_vars(data), fill = TRUE), dapply(num_vars(data), basecumsum, fill = TRUE)) expect_identical(fcumsum(num_vars(dataNA)), dapply(num_vars(dataNA), basecumsum)) expect_identical(fcumsum(num_vars(dataNA), na.rm = FALSE), dapply(num_vars(dataNA), basecumsum, na.rm = FALSE)) expect_identical(fcumsum(num_vars(dataNA), fill = TRUE), dapply(num_vars(dataNA), basecumsum, fill = TRUE)) # With groups, no ordering expect_identical(fcumsum(x, f), BY(x, f, basecumsum, use.g.names = FALSE)) expect_identical(fcumsum(x, na.rm = FALSE, f), BY(x, f, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_identical(fcumsum(x, f, fill = TRUE), BY(x, f, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_identical(fcumsum(xNA, f), BY(xNA, f, basecumsum, use.g.names = FALSE)) expect_identical(fcumsum(xNA, na.rm = FALSE, f), BY(xNA, f, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_identical(fcumsum(xNA, f, fill = TRUE), BY(xNA, f, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_identical(fcumsum(m, g), rnBY(m, g, basecumsum, use.g.names = FALSE)) expect_identical(fcumsum(m, na.rm = FALSE, g), rnBY(m, g, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_identical(fcumsum(m, g, fill = TRUE), rnBY(m, g, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_identical(fcumsum(mNA, g), rnBY(mNA, g, basecumsum, use.g.names = FALSE)) expect_identical(fcumsum(mNA, na.rm = FALSE, g), rnBY(mNA, g, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_identical(fcumsum(mNA, g, fill = TRUE), rnBY(mNA, g, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_identical(fcumsum(num_vars(data), g), rnBY(num_vars(data), g, basecumsum, use.g.names = FALSE)) expect_identical(fcumsum(num_vars(data), na.rm = FALSE, g), rnBY(num_vars(data), g, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_identical(fcumsum(num_vars(data), g, fill = TRUE), rnBY(num_vars(data), g, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_identical(fcumsum(num_vars(dataNA), g), rnBY(num_vars(dataNA), g, basecumsum, use.g.names = FALSE)) expect_identical(fcumsum(num_vars(dataNA), g, na.rm = FALSE), rnBY(num_vars(dataNA), g, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_identical(fcumsum(num_vars(dataNA), g, fill = TRUE), rnBY(num_vars(dataNA), g, basecumsum, fill = TRUE, use.g.names = FALSE)) }) test_that("fcumsum with integers correctly handles unordered time-series and panel-series computations", { # With ordering, no groups: 1 expect_identical(fcumsum(x, o = 1:100), fcumsum(x)) expect_identical(fcumsum(x, o = 1:100, na.rm = FALSE), fcumsum(x, na.rm = FALSE)) expect_identical(fcumsum(x, o = 1:100, fill = TRUE), fcumsum(x, fill = TRUE)) expect_identical(fcumsum(xNA, o = 1:100), fcumsum(xNA)) expect_identical(fcumsum(xNA, o = 1:100, na.rm = FALSE), fcumsum(xNA, na.rm = FALSE)) expect_identical(fcumsum(xNA, o = 1:100, fill = TRUE), fcumsum(xNA, fill = TRUE)) expect_identical(fcumsum(m, o = seq_row(m)), fcumsum(m)) expect_identical(fcumsum(m, o = seq_row(m), na.rm = FALSE), fcumsum(m, na.rm = FALSE)) expect_identical(fcumsum(m, o = seq_row(m), fill = TRUE), fcumsum(m, fill = TRUE)) expect_identical(fcumsum(mNA, o = seq_row(m)), fcumsum(mNA)) expect_identical(fcumsum(mNA, o = seq_row(m), na.rm = FALSE), fcumsum(mNA, na.rm = FALSE)) expect_identical(fcumsum(mNA, o = seq_row(m), fill = TRUE), fcumsum(mNA, fill = TRUE)) expect_identical(fcumsum(num_vars(data), o = seq_row(data)), fcumsum(num_vars(data))) expect_identical(fcumsum(num_vars(data), o = seq_row(data), na.rm = FALSE), fcumsum(num_vars(data), na.rm = FALSE)) expect_identical(fcumsum(num_vars(data), o = seq_row(data), fill = TRUE), fcumsum(num_vars(data), fill = TRUE)) expect_identical(fcumsum(num_vars(dataNA), o = seq_row(data)), fcumsum(num_vars(dataNA))) expect_identical(fcumsum(num_vars(dataNA), o = seq_row(data), na.rm = FALSE), fcumsum(num_vars(dataNA), na.rm = FALSE)) expect_identical(fcumsum(num_vars(dataNA), o = seq_row(data), fill = TRUE), fcumsum(num_vars(dataNA), fill = TRUE)) # With ordering, no groups: 2 expect_identical(fcumsum(xuo, o = t2uo)[o], fcumsum(x)) expect_identical(fcumsum(xuo, o = t2uo, na.rm = FALSE)[o], fcumsum(x, na.rm = FALSE)) expect_identical(fcumsum(xuo, o = t2uo, fill = TRUE)[o], fcumsum(x, fill = TRUE)) expect_identical(fcumsum(xNAuo, o = t2uo)[o], fcumsum(xNA)) expect_identical(fcumsum(xNAuo, o = t2uo, na.rm = FALSE)[o], fcumsum(xNA, na.rm = FALSE)) expect_identical(fcumsum(xNAuo, o = t2uo, fill = TRUE)[o], fcumsum(xNA, fill = TRUE)) expect_identical(fcumsum(muo, o = t2duo)[od, ], fcumsum(m)) expect_identical(fcumsum(muo, o = t2duo, na.rm = FALSE)[od, ], fcumsum(m, na.rm = FALSE)) expect_identical(fcumsum(muo, o = t2duo, fill = TRUE)[od, ], fcumsum(m, fill = TRUE)) expect_identical(fcumsum(mNAuo, o = t2duo)[od, ], fcumsum(mNA)) expect_identical(fcumsum(mNAuo, o = t2duo, na.rm = FALSE)[od, ], fcumsum(mNA, na.rm = FALSE)) expect_identical(fcumsum(mNAuo, o = t2duo, fill = TRUE)[od, ], fcumsum(mNA, fill = TRUE)) expect_identical(fcumsum(num_vars(datauo), o = t2duo)[od, ], fcumsum(num_vars(data))) expect_identical(fcumsum(num_vars(datauo), o = t2duo, na.rm = FALSE)[od, ], fcumsum(num_vars(data), na.rm = FALSE)) expect_identical(fcumsum(num_vars(datauo), o = t2duo, fill = TRUE)[od, ], fcumsum(num_vars(data), fill = TRUE)) expect_identical(fcumsum(num_vars(dataNAuo), o = t2duo)[od, ], fcumsum(num_vars(dataNA))) expect_identical(fcumsum(num_vars(dataNAuo), o = t2duo, na.rm = FALSE)[od, ], fcumsum(num_vars(dataNA), na.rm = FALSE)) expect_identical(fcumsum(num_vars(dataNAuo), o = t2duo, fill = TRUE)[od, ], fcumsum(num_vars(dataNA), fill = TRUE)) # With ordering and groups expect_identical(fcumsum(xuo, fuo, tuo)[o], fcumsum(x, f, t)) expect_identical(fcumsum(xuo, fuo, tuo, na.rm = FALSE)[o], fcumsum(x, f, t, na.rm = FALSE)) expect_identical(fcumsum(xuo, fuo, tuo, fill = TRUE)[o], fcumsum(x, f, t, fill = TRUE)) expect_identical(fcumsum(xNAuo, fuo, tuo)[o], fcumsum(xNA, f, t)) expect_identical(fcumsum(xNAuo, fuo, tuo, na.rm = FALSE)[o], fcumsum(xNA, f, t, na.rm = FALSE)) expect_identical(fcumsum(xNAuo, fuo, tuo, fill = TRUE)[o], fcumsum(xNA, f, t, fill = TRUE)) expect_identical(fcumsum(muo, guo, tduo)[od, ], fcumsum(m, g, td)) expect_identical(fcumsum(muo, guo, tduo, na.rm = FALSE)[od, ], fcumsum(m, g, td, na.rm = FALSE)) expect_identical(fcumsum(muo, guo, tduo, fill = TRUE)[od, ], fcumsum(m, g, td, fill = TRUE)) expect_identical(fcumsum(mNAuo, guo, tduo)[od, ], fcumsum(mNA, g, td)) expect_identical(fcumsum(mNAuo, guo, tduo, na.rm = FALSE)[od, ], fcumsum(mNA, g, td, na.rm = FALSE)) expect_identical(fcumsum(mNAuo, guo, tduo, fill = TRUE)[od, ], fcumsum(mNA, g, td, fill = TRUE)) expect_identical(fcumsum(num_vars(datauo), guo, tduo)[od, ], fcumsum(num_vars(data), g, td)) expect_identical(fcumsum(num_vars(datauo), guo, tduo, na.rm = FALSE)[od, ], fcumsum(num_vars(data), g, td, na.rm = FALSE)) expect_identical(fcumsum(num_vars(datauo), guo, tduo, fill = TRUE)[od, ], fcumsum(num_vars(data), g, td, fill = TRUE)) expect_identical(fcumsum(num_vars(dataNAuo), guo, tduo)[od, ], fcumsum(num_vars(dataNA), g, td)) expect_identical(fcumsum(num_vars(dataNAuo), guo, tduo, na.rm = FALSE)[od, ], fcumsum(num_vars(dataNA), g, td, na.rm = FALSE)) expect_identical(fcumsum(num_vars(dataNAuo), guo, tduo, fill = TRUE)[od, ], fcumsum(num_vars(dataNA), g, td, fill = TRUE)) }) test_that("fcumsum with integers performs numerically stable in ordered computations", { expect_true(all_identical(replicate(50, fcumsum(x), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(xNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(m), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(mNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(data)), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(dataNA)), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(x, f), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(x, f, fill = TRUE), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(xNA, f), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(xNA, f, fill = TRUE), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(m, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(m, g, fill = TRUE), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(mNA, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(mNA, g, fill = TRUE), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(data), g), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(data), g, fill = TRUE), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(dataNA), g), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(dataNA), g, fill = TRUE), simplify = FALSE))) }) test_that("fcumsum with integers performs numerically stable in unordered computations", { expect_true(all_identical(replicate(50, fcumsum(xuo, o = t2uo), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(xNAuo, o = t2uo), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(muo, o = t2duo), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(datauo), o = t2duo), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(xuo, fuo, tuo), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(xuo, fuo, tuo, fill = TRUE), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(muo, guo, tduo), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(muo, guo, tduo, fill = TRUE), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(datauo), guo, tduo), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(datauo), guo, tduo, fill = TRUE), simplify = FALSE))) }) test_that("fcumsum handles special values in the right way", { expect_identical(fcumsum(c(NaN,NaN)), c(NaN,NaN)) expect_identical(fcumsum(c(Inf,Inf)), c(Inf,Inf)) expect_identical(fcumsum(c(Inf,-Inf)), c(Inf,NaN)) expect_identical(fcumsum(c(FALSE,TRUE)), c(0L,1L)) expect_identical(fcumsum(c(TRUE,FALSE)), c(1L,1L)) expect_identical(fcumsum(c(1,NA)), c(1,NA)) expect_identical(fcumsum(c(NA,1)), c(NA,1)) expect_identical(fcumsum(c(1L,NA)), c(1L,NA)) expect_identical(fcumsum(c(NA,1L)), c(NA,1L)) expect_identical(fcumsum(c(NaN,1)), c(NaN,1)) expect_identical(fcumsum(c(1,NaN)), c(1, NaN)) expect_identical(fcumsum(c(Inf,1)), c(Inf,Inf)) expect_identical(fcumsum(c(1,Inf)), c(1,Inf)) expect_identical(fcumsum(c(Inf,NA)), c(Inf,NA)) expect_identical(fcumsum(c(NA,Inf)), c(NA, Inf)) }) test_that("fcumsum produces errors for wrong input", { # type: normally guaranteed by C++ expect_error(fcumsum(mNAc)) expect_error(fcumsum(wlddev)) expect_error(fcumsum(mNAc, f)) expect_error(fcumsum(x, "1")) # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(fcumsum(1:3, o = 1:2)) expect_error(fcumsum(1:3, o = 1:4)) expect_error(fcumsum(1:3, g = 1:2)) expect_error(fcumsum(1:3, g = 1:4)) expect_error(fcumsum(1:4, g = c(1,1,2,2), o = c(1,2,1))) expect_error(fcumsum(1:4, g = c(1,2,2), o = c(1,2,1,2))) }) x <- as.integer(wlddev$year * 1000000L) set.seed(101) xNA <- na_insert(x) g <- wlddev$iso3c o <- seq_along(x) test_that("Integer overflow errors", { # Slightly exceeding INT_MIN and INT_MAX expect_error(fcumsum(c(-2147483646L, -2L))) expect_error(fcumsum(c(-2147483646L, -2L), na.rm = FALSE)) expect_error(fcumsum(c(-2147483646L, -2L), fill = TRUE)) expect_error(fcumsum(c(2147483646L, 2L))) expect_error(fcumsum(c(2147483646L, 2L), na.rm = FALSE)) expect_error(fcumsum(c(2147483646L, 2L), fill = TRUE)) # No groups expect_error(fcumsum(x)) expect_error(fcumsum(x, na.rm = FALSE)) expect_error(fcumsum(x, fill = TRUE)) expect_error(fcumsum(xNA)) expect_error(fcumsum(xNA, fill = TRUE)) # With groups expect_error(fcumsum(x, g)) expect_error(fcumsum(x, g, na.rm = FALSE)) expect_error(fcumsum(x, g, fill = TRUE)) expect_error(fcumsum(xNA, g)) expect_error(fcumsum(xNA, g, fill = TRUE)) # No groups: Ordered expect_error(fcumsum(x, o = o, check.o = FALSE)) expect_error(fcumsum(x, o = o, check.o = FALSE, na.rm = FALSE)) expect_error(fcumsum(x, o = o, check.o = FALSE, fill = TRUE)) expect_error(fcumsum(xNA, o = o, check.o = FALSE)) expect_error(fcumsum(xNA, o = o, check.o = FALSE, fill = TRUE)) # With groups: Ordered expect_error(fcumsum(x, g, o = o, check.o = FALSE)) expect_error(fcumsum(x, g, o = o, check.o = FALSE, na.rm = FALSE)) expect_error(fcumsum(x, g, o = o, check.o = FALSE, fill = TRUE)) expect_error(fcumsum(xNA, g, o = o, check.o = FALSE)) expect_error(fcumsum(xNA, g, o = o, check.o = FALSE, fill = TRUE)) }) collapse/tests/testthat/test-list-processing.R0000644000176200001440000000720614167362476021330 0ustar liggesuserscontext("list-processing") l <- lm(mpg ~cyl + vs + am, mtcars) # str(l, give.attr = FALSE) is.regular <- function(x) is.atomic(x) || is.list(x) test_that("atomic_elem and list_elem work well", { expect_equal(atomic_elem(l), unclass(l)[sapply(l, is.atomic)]) expect_equal(list_elem(l), unclass(l)[sapply(l, is.list)]) expect_equal(atomic_elem(l, keep.class = TRUE), `oldClass<-`(unclass(l)[sapply(l, is.atomic)], oldClass(l))) expect_equal(list_elem(l, keep.class = TRUE), `oldClass<-`(unclass(l)[sapply(l, is.list)], oldClass(l))) for(i in 1:6) expect_equal(atomic_elem(l, keep.class = TRUE, return = i), get_vars(l, is.atomic, return = i)) for(i in 1:6) expect_equal(list_elem(l, keep.class = TRUE, return = i), get_vars(l, is.list, return = i)) expect_identical(`atomic_elem<-`(l, atomic_elem(l)), l) expect_identical(`list_elem<-`(l, list_elem(l)), l) expect_error(`atomic_elem<-`(l, list_elem(l))) expect_error(`list_elem<-`(l, atomic_elem(l))) }) test_that("ldepth works well", { expect_identical(ldepth(list(mtcars), DF.as.list = FALSE), 1L) expect_identical(ldepth(list(mtcars), DF.as.list = TRUE), 2L) expect_identical(ldepth(list(mtcars, l), DF.as.list = FALSE), 3L) expect_identical(ldepth(list(mtcars, l), DF.as.list = TRUE), 3L) expect_identical(ldepth(list(list(list(mtcars)), l), DF.as.list = FALSE), 3L) expect_identical(ldepth(list(list(list(mtcars)), l), DF.as.list = TRUE), 4L) }) test_that("rapply2d works well", { l2 <- list(qM(mtcars), list(qM(mtcars), as.matrix(mtcars))) expect_equal(rapply2d(l2, fmean), rapply(l2, fmean, how = "list")) expect_equal(rapply2d(l[-length(l)], is.regular), rapply(l[-length(l)], is.regular, how = "list")) }) test_that("get_elem works well", { # Could still add more tests.. expect_true(is.matrix(get_elem(list(list(list(l))), is.matrix))) expect_false(is.matrix(get_elem(list(list(list(l))), is.matrix, keep.tree = TRUE))) l2 <- list(list(2,list("a",1)),list(1,list("b",2))) expect_identical(get_elem(l2, is.character), list("a", "b")) expect_identical(get_elem(l2, is.character, keep.tree = TRUE), list(list(list("a")),list(list("b")))) expect_identical(get_elem(l, "residuals"), resid(l)) expect_identical(get_elem(l, "fit", regex = TRUE), fitted(l)) expect_equal(get_elem(l, "tol"), 1e-7) expect_identical(get_elem(mtcars, 1), mtcars[[1]]) expect_identical(get_elem(mtcars, 1, DF.as.list = TRUE), as.list(ss(mtcars, 1))) }) test_that("reg_elem and irreg_elem work well", { expect_true(is_unlistable(reg_elem(l))) expect_false(is_unlistable(irreg_elem(l))) expect_true(is_unlistable(reg_elem(list(l), keep.tree = FALSE))) expect_true(is_unlistable(reg_elem(list(l), keep.tree = TRUE))) expect_false(is_unlistable(irreg_elem(list(l), keep.tree = FALSE))) expect_false(is_unlistable(irreg_elem(list(l), keep.tree = TRUE))) }) test_that("has_elem works well", { expect_true(has_elem(l, is.matrix)) expect_true(has_elem(l, is.data.frame)) expect_false(has_elem(l, is.data.frame, DF.as.list = TRUE)) expect_true(has_elem(l, is_categorical)) expect_false(has_elem(l, is_date)) expect_false(has_elem(l, is_qG)) expect_false(has_elem(l, "am", recursive = FALSE)) expect_false(has_elem(l, "pivot", recursive = FALSE)) expect_true(has_elem(l, "pivot")) expect_true(has_elem(l, "am", DF.as.list = TRUE)) expect_false(has_elem(l, "am")) expect_true(has_elem(l, "tol")) expect_false(has_elem(l, "mod")) expect_true(has_elem(l, "mod", regex = TRUE)) expect_true(has_elem(l, "vot", regex = TRUE)) expect_false(has_elem(l, "piv", regex = TRUE, recursive = FALSE)) }) collapse/tests/testthat/test-misc.R0000644000176200001440000002532414166300467017126 0ustar liggesuserscontext("Misc") # rm(list = ls()) set.seed(101) m <- na_insert(qM(mtcars)) test_that("descr, pwcor, pwcov, pwnobs", { expect_visible(descr(wlddev)) expect_visible(as.data.frame(descr(wlddev))) expect_output(print(descr(wlddev))) expect_visible(descr(GGDC10S)) expect_output(print(pwcor(nv(wlddev)))) expect_output(print(pwcor(nv(wlddev), N = TRUE))) expect_output(print(pwcor(nv(wlddev), P = TRUE))) expect_output(print(pwcor(nv(wlddev), N = TRUE, P = TRUE))) expect_output(print(pwcor(nv(wlddev), N = TRUE, P = TRUE, use = "complete.obs"))) expect_visible(pwcor(nv(GGDC10S))) expect_visible(pwcov(nv(wlddev))) expect_output(print(pwcov(nv(wlddev)))) expect_output(print(pwcov(nv(wlddev), N = TRUE))) expect_output(print(pwcov(nv(wlddev), P = TRUE))) expect_output(print(pwcov(nv(wlddev), N = TRUE, P = TRUE))) expect_output(print(pwcov(nv(wlddev), N = TRUE, P = TRUE, use = "complete.obs"))) expect_visible(pwnobs(wlddev)) expect_visible(pwnobs(GGDC10S)) expect_visible(descr(m)) expect_visible(pwcor(m)) expect_visible(pwcov(m)) expect_visible(pwnobs(m)) }) if(identical(Sys.getenv("NCRAN"), "TRUE")) { test_that("weighted correlations are correct", { # This is to fool very silly checks on CRAN scanning the code of the tests wtd.cors <- eval(parse(text = paste0("weights", ":", ":", "wtd.cors"))) wtd.cor <- eval(parse(text = paste0("weights", ":", ":", "wtd.cor"))) w <- abs(rnorm(fnrow(wlddev))) cc <- which(!missing_cases(nv(wlddev))) expect_equal(unclass(pwcor(nv(wlddev), w = w)), wtd.cors(nv(wlddev), w = w)) expect_equal(unclass(pwcor(nv(wlddev), w = w)), cov2cor(unclass(pwcov(nv(wlddev), w = w)))) expect_true(all_obj_equal(unclass(pwcor(ss(nv(wlddev), cc), w = w[cc])), cov2cor(unclass(pwcov(ss(nv(wlddev), cc), w = w[cc]))), unclass(pwcor(nv(wlddev), w = w, use = "complete.obs")), wtd.cors(ss(nv(wlddev), cc), w = w[cc]), cov.wt(ss(nv(wlddev), cc), w[cc], cor = TRUE)$cor)) suppressWarnings( expect_true(all_obj_equal(replace_NA(pwcor(ss(nv(wlddev), cc), w = w[cc], P = TRUE, array = FALSE)$P, 0), replace_NA(pwcov(ss(nv(wlddev), cc), w = w[cc], P = TRUE, array = FALSE)$P, 0), replace_NA(pwcor(ss(nv(wlddev), cc), w = w[cc], P = TRUE, array = FALSE, use = "complete.obs")$P, 0), replace_NA(pwcov(ss(nv(wlddev), cc), w = w[cc], P = TRUE, array = FALSE, use = "complete.obs")$P, 0), wtd.cor(ss(nv(wlddev), cc), w = w[cc])$p.value))) expect_true(all_obj_equal(unclass(pwcov(ss(nv(wlddev), cc), w = w[cc])), unclass(pwcov(nv(wlddev), w = w, use = "complete.obs")))) expect_equal(cov.wt(ss(nv(wlddev), cc), w[cc])$cov, unclass(pwcov(nv(wlddev), w = w, use = "complete.obs")), tolerance = 1e-3) }) test_that("na_rm works well", { set.seed(101) expect_equal(sapply(na_insert(wlddev), function(x) vtypes(na_rm(x))), vtypes(wlddev)) expect_equal(sapply(na_insert(wlddev), function(x) vlabels(na_rm(x))), vlabels(wlddev)) expect_equal(sapply(na_insert(wlddev), function(x) vclasses(na_rm(x))), vclasses(wlddev)) wldNA <- na_insert(wlddev) expect_equal(lengths(lapply(wldNA, na_rm)), fnobs(wldNA)) expect_equal(lapply(wldNA, na_rm), lapply(wldNA, function(x) copyMostAttrib(x[!is.na(x)], x))) rm(wldNA) expect_equal(na_rm(list(list(), 1,2,3)), list(1,2,3)) expect_equal(na_rm(list(1,2,NULL,3)), list(1,2,3)) }) } test_that("vlabels works well", { expect_equal(wlddev, setLabels(wlddev, vlabels(wlddev))) }) test_that("adding and removing stubs works", { expect_identical(rm_stub(add_stub(iris, "df"), "df"), iris) expect_identical(rm_stub(add_stub(iris, "df", pre = FALSE), "df", pre = FALSE), iris) expect_identical(rm_stub(add_stub(iris, "df", pre = FALSE), "df", regex = TRUE), iris) expect_identical(rm_stub(names(iris), "Sepal")[1], ".Length") expect_identical(rm_stub(names(iris), "Width", pre = FALSE)[4], "Petal.") expect_identical(rm_stub(names(iris), "Width", regex = TRUE)[4], "Petal.") }) test_that("deep matrix dispatch works well", { tsm <- EuStockMarkets class(tsm) <- setdiff(class(tsm), "matrix") set.seed(101) f <- qF(sample.int(5, nrow(tsm), TRUE)) NCOL2 <- function(x) if(length(d <- dim(x)) > 1L) d[2L] else length(x) for(i in setdiff(c(.FAST_FUN, .OPERATOR_FUN), c("fnth","flag","L","F", "fdiff","D","Dlog", "fgrowth","G"))) expect_equal(NCOL2(match.fun(i)(tsm, f)), 4L) expect_equal(NCOL2(fnth(tsm, 0.5, f)), 4L) expect_equal(NCOL2(BY(tsm, f, sum)), 4L) expect_equal(nrow(qsu(tsm)), 4L) for(i in c("flag", "L", "fdiff", "D", "Dlog", "fgrowth", "G")) expect_true(all(is.na(match.fun(i)(tsm)[1L, ]))) }) m <- qM(mtcars) v <- mtcars$mpg f <- qF(mtcars$cyl) fcc <- qF(mtcars$cyl, na.exclude = FALSE) g <- GRP(mtcars, ~ cyl) gl <- mtcars["cyl"] gmtc <- fgroup_by(mtcars, cyl) test_that("fast functions give same result using different grouping mechanisms", { for(i in .FAST_STAT_FUN) { # print(i) FUN <- match.fun(i) expect_true(all_obj_equal(FUN(v, g = mtcars$cyl), FUN(v, g = f), FUN(v, g = fcc), FUN(v, g = g), FUN(v, g = gl))) expect_true(all_obj_equal(FUN(v, g = mtcars$cyl, use.g.names = FALSE), FUN(v, g = f, use.g.names = FALSE), FUN(v, g = fcc, use.g.names = FALSE), FUN(v, g = g, use.g.names = FALSE), FUN(v, g = gl, use.g.names = FALSE))) expect_true(all_obj_equal(FUN(m, g = mtcars$cyl), FUN(m, g = f), FUN(m, g = fcc), FUN(m, g = g), FUN(m, g = gl))) expect_true(all_obj_equal(FUN(m, g = mtcars$cyl, use.g.names = FALSE), FUN(m, g = f, use.g.names = FALSE), FUN(m, g = fcc, use.g.names = FALSE), FUN(m, g = g, use.g.names = FALSE), FUN(m, g = gl, use.g.names = FALSE))) expect_true(all_obj_equal(FUN(mtcars, g = mtcars$cyl), FUN(mtcars, g = f), FUN(mtcars, g = fcc), FUN(mtcars, g = g), FUN(mtcars, g = gl))) if(Sys.getenv("NCRAN") == "TRUE") expect_true(all_obj_equal(FUN(mtcars, g = mtcars$cyl, use.g.names = FALSE), FUN(mtcars, g = f, use.g.names = FALSE), FUN(mtcars, g = fcc, use.g.names = FALSE), FUN(mtcars, g = g, use.g.names = FALSE), FUN(mtcars, g = gl, use.g.names = FALSE))) if(Sys.getenv("NCRAN") == "TRUE") expect_true(all_obj_equal(gv(FUN(mtcars, g = mtcars$cyl, use.g.names = FALSE), -2), gv(FUN(gmtc), -1), gv(FUN(gv(gmtc,-2)), -1), FUN(gv(gmtc,-2), keep.group_vars = FALSE), FUN(gmtc, keep.group_vars = FALSE))) expect_equal(FUN(v, TRA = 2L), TRA(v, FUN(v), 2L)) expect_true(all_obj_equal(FUN(v, g = mtcars$cyl, TRA = 1L), TRA(v, FUN(v, g = mtcars$cyl), 1L, mtcars$cyl), FUN(v, g = f, TRA = 1L), TRA(v, FUN(v, g = f), 1L, f), FUN(v, g = fcc, TRA = 1L), TRA(v, FUN(v, g = fcc), 1L, fcc), FUN(v, g = g, TRA = 1L), TRA(v, FUN(v, g = g), 1L, g), FUN(v, g = gl, TRA = 1L), TRA(v, FUN(v, g = gl), 1L, gl))) expect_equal(FUN(m, TRA = 2L), TRA(m, FUN(m), 2L)) expect_true(all_obj_equal(FUN(m, g = mtcars$cyl, TRA = 1L), TRA(m, FUN(m, g = mtcars$cyl), 1L, mtcars$cyl), FUN(m, g = f, TRA = 1L), TRA(m, FUN(m, g = f), 1L, f), FUN(m, g = fcc, TRA = 1L), TRA(m, FUN(m, g = fcc), 1L, fcc), FUN(m, g = g, TRA = 1L), TRA(m, FUN(m, g = g), 1L, g), FUN(m, g = gl, TRA = 1L), TRA(m, FUN(m, g = gl), 1L, gl))) expect_equal(FUN(mtcars, TRA = 2L), TRA(mtcars, FUN(mtcars), 2L)) expect_true(all_obj_equal(FUN(mtcars, g = mtcars$cyl, TRA = 1L), TRA(mtcars, FUN(mtcars, g = mtcars$cyl), 1L, mtcars$cyl), FUN(mtcars, g = f, TRA = 1L), TRA(mtcars, FUN(mtcars, g = f), 1L, f), FUN(mtcars, g = fcc, TRA = 1L), TRA(mtcars, FUN(mtcars, g = fcc), 1L, fcc), FUN(mtcars, g = g, TRA = 1L), TRA(mtcars, FUN(mtcars, g = g), 1L, g), FUN(mtcars, g = gl, TRA = 1L), TRA(mtcars, FUN(mtcars, g = gl), 1L, gl))) expect_equal(colorder(FUN(gmtc, TRA = 1L), mpg, cyl), TRA(gmtc, FUN(gmtc), 1L)) expect_equal(FUN(fselect(gmtc, -cyl), TRA = 1L), TRA(fselect(gmtc, -cyl), FUN(gmtc, keep.group_vars = FALSE), 1L)) } for(i in setdiff(.FAST_FUN, c(.FAST_STAT_FUN, "fhdbetween", "fhdwithin"))) { FUN <- match.fun(i) expect_true(all_obj_equal(FUN(v, g = mtcars$cyl), FUN(v, g = f), FUN(v, g = fcc), FUN(v, g = g), FUN(v, g = gl))) expect_true(all_obj_equal(FUN(m, g = mtcars$cyl), FUN(m, g = f), FUN(m, g = fcc), FUN(m, g = g), FUN(m, g = gl))) expect_true(all_obj_equal(FUN(mtcars, g = mtcars$cyl), FUN(mtcars, g = f), FUN(mtcars, g = fcc), FUN(mtcars, g = g), FUN(mtcars, g = gl))) } for(i in c("STD", "B", "W", "L", "D", "Dlog", "G")) { FUN <- match.fun(i) expect_true(all_obj_equal(FUN(v, g = mtcars$cyl), FUN(v, g = f), FUN(v, g = fcc), FUN(v, g = g), FUN(v, g = gl))) expect_true(all_obj_equal(FUN(m, g = mtcars$cyl), FUN(m, g = f), FUN(m, g = fcc), FUN(m, g = g), FUN(m, g = gl))) expect_true(all_obj_equal(FUN(mtcars, by = mtcars$cyl), FUN(mtcars, by = f), FUN(mtcars, by = fcc), FUN(mtcars, by = g), FUN(mtcars, by = gl))) } }) l <- as.list(mtcars) test_that("list and df methods give same results", { for (i in setdiff(c(.FAST_FUN, .OPERATOR_FUN), c("fhdbetween", "fhdwithin", "HDB", "HDW"))) { FUN <- match.fun(i) expect_equal(unattrib(FUN(mtcars)), unattrib(FUN(l))) } }) w <- mtcars$wt wFUNs <- c("fmean","fmedian","fsum","fprod","fmode","fvar","fsd","fscale","STD","fbetween","B","fwithin","W") test_that("fast functions give appropriate warnings", { for (i in setdiff(c(.FAST_FUN, .OPERATOR_FUN, "qsu"), c("fhdbetween", "fhdwithin", "HDB", "HDW"))) { FUN <- match.fun(i) expect_warning(FUN(v, bla = 1)) expect_warning(FUN(m, bla = 1)) expect_warning(FUN(mtcars, bla = 1)) expect_warning(FUN(gmtc, bla = 1)) if(i %in% wFUNs) { expect_warning(FUN(gmtc, bla = 1)) expect_error(FUN(gmtc, cyl)) # weight same as grouping variable if(i %in% .FAST_STAT_FUN) expect_true(names(FUN(gmtc, wt))[2L] == if(i == "fprod") "prod.wt" else "sum.wt") # weight same as grouping variable } } }) test_that("fselect and fsubset cannot easily be confuesed", { expect_warning(fsubset(mtcars, mpg:vs, wt)) expect_error(fselect(mtcars, mpg == 1)) }) collapse/tests/testthat.R0000644000176200001440000000016513767205743015221 0ustar liggesusers# rm(list = ls()) # Sys.setenv("R_TESTS" = "") library(testthat) # library(collapse) test_check("collapse") collapse/src/0000755000176200001440000000000014201327662012646 5ustar liggesuserscollapse/src/kit_dup.c0000644000176200001440000005130614175334642014464 0ustar liggesusers/* This code is adapted from the kit package: https://github.com/2005m/kit and licensed under a GPL-3.0 license. */ #include "kit.h" // TODO: Check if division hash is not faster, or use Rcpp IndexHash // TODO: Option to Preserva NA's ? // **************************************** // This function groups a single vector // **************************************** SEXP dupVecIndex(SEXP x) { const int n = length(x); int K, tx = TYPEOF(x); size_t M; if(n >= INT_MAX) error("Length of 'x' is too large. (Long vector not supported yet)"); // 1073741824 if (tx == STRSXP || tx == REALSXP || tx == CPLXSXP ) { const size_t n2 = 2U * (size_t) n; M = 256; K = 8; while (M < n2) { M *= 2; K++; } } else if(tx == INTSXP) { if(isFactor(x)) { tx = 1000; M = (size_t)nlevels(x) + 2; } else M = (size_t)n; } else if (tx == LGLSXP) { M = 3; } else error("Type %s is not supported.", type2char(tx)); // # nocov int *h = (int*)Calloc(M, int); // Table to save the hash values, table has size M // memset(h, 0, M * sizeof(int)); // not needed?? SEXP ans_i = PROTECT(allocVector(INTSXP, n)); int *pans_i = INTEGER(ans_i); size_t id = 0, g = 0; switch (tx) { case LGLSXP: case 1000: // This is for factors or logical vectors where the size of the table is known { const int *px = INTEGER(x); for (int i = 0, j, k = (int)M-1; i != n; ++i) { j = (px[i] == NA_INTEGER) ? k : px[i]; if(h[j]) pans_i[i] = h[j]; else pans_i[i] = h[j] = ++g; } } break; case INTSXP: { // Faster version based on division hash... const int *px = INTEGER(x); unsigned int iid = 0, nu = (unsigned)n; for (int i = 0; i != n; ++i) { iid = (unsigned)px[i]; if(iid >= nu) iid %= nu; // iid = (xi < nu) ? xi : xi % nu; // HASH(px[i], K); // get the hash value of x[i] while(h[iid]) { // Check if this hash value has been seen before if(px[h[iid]-1] == px[i]) { // Get the element of x that produced his value. if x[i] is the same, assign it the same index. pans_i[i] = pans_i[h[iid]-1]; // h[id]; goto ibl; } // else, we move forward to the next slot, until we find an empty one... We need to keep checking against the values, // because if we found the same value before, we would also have put it in another slot after the initial one with the same hash value. if(++iid >= nu) iid %= nu; // # nocov } // We put the index into the empty slot. h[iid] = i + 1; // need + 1 because for zero the while loop gives false.. pans_i[i] = ++g; // h[id]; ibl:; } } break; case REALSXP: { const double *px = REAL(x); union uno tpv; for (int i = 0; i != n; ++i) { tpv.d = px[i]; // R_IsNA(px[i]) ? NA_REAL : (R_IsNaN(px[i]) ? R_NaN : px[i]); id = HASH(tpv.u[0] + tpv.u[1], K); while(h[id]) { if(REQUAL(px[h[id]-1], px[i])) { pans_i[i] = pans_i[h[id]-1]; // h[id]; goto rbl; } if(++id >= M) id %= M; } h[id] = i + 1; pans_i[i] = ++g; // h[id]; rbl:; } } break; case CPLXSXP: { const Rcomplex *px = COMPLEX(x); unsigned int u; union uno tpv; Rcomplex tmp; for (int i = 0; i != n; ++i) { tmp.r = (px[i].r == 0.0) ? 0.0 : px[i].r; tmp.i = (px[i].i == 0.0) ? 0.0 : px[i].i; if (C_IsNA(tmp)) { tmp.r = tmp.i = NA_REAL; } else if (C_IsNaN(tmp)) { tmp.r = tmp.i = R_NaN; } tpv.d = tmp.r; u = tpv.u[0] ^ tpv.u[1]; tpv.d = tmp.i; u ^= tpv.u[0] ^ tpv.u[1]; id = HASH(u, K); while(h[id]) { if(CEQUAL(px[h[id]-1], px[i])) { pans_i[i] = pans_i[h[id]-1]; // h[id]; goto cbl; } if(++id >= M) id %= M; // # nocov } h[id] = i + 1; pans_i[i] = ++g; // h[id]; cbl:; } } break; case STRSXP: { const SEXP *px = STRING_PTR(x); for (int i = 0; i != n; ++i) { id = HASH(((intptr_t) px[i] & 0xffffffff), K); while(h[id]) { if(px[h[id]-1] == px[i]) { pans_i[i] = pans_i[h[id]-1]; // h[id]; goto sbl; } if(++id >= M) id %= M; // # nocov } h[id] = i + 1; pans_i[i] = ++g; // h[id]; sbl:; } } break; } Free(h); setAttrib(ans_i, install("N.groups"), ScalarInteger(g)); UNPROTECT(1); return ans_i; } SEXP dupVecIndexKeepNA(SEXP x) { const int n = length(x); int K, tx = TYPEOF(x); size_t M; if(n >= INT_MAX) error("Length of 'x' is too large. (Long vector not supported yet)"); // 1073741824 if (tx == STRSXP || tx == REALSXP || tx == CPLXSXP ) { const size_t n2 = 2U * (size_t) n; M = 256; K = 8; while (M < n2) { M *= 2; K++; } } else if(tx == INTSXP) { if(isFactor(x)) { tx = 1000; M = (size_t)nlevels(x) + 2; } else M = (size_t)n; } else if (tx == LGLSXP) { M = 3; } else error("Type %s is not supported.", type2char(tx)); // # nocov int *h = (int*)Calloc(M, int); // Table to save the hash values, table has size M // memset(h, 0, M * sizeof(int)); // not needed?? SEXP ans_i = PROTECT(allocVector(INTSXP, n)); int *pans_i = INTEGER(ans_i); size_t id = 0, g = 0; switch (tx) { case LGLSXP: case 1000: // This is for factors or logical vectors where the size of the table is known { const int *px = INTEGER(x); for (int i = 0, j; i != n; ++i) { if(px[i] == NA_INTEGER) { pans_i[i] = NA_INTEGER; continue; } j = px[i]; if(h[j]) pans_i[i] = h[j]; else pans_i[i] = h[j] = ++g; } } break; case INTSXP: { // Faster version based on division hash... const int *px = INTEGER(x); unsigned int iid = 0, nu = (unsigned)n; for (int i = 0; i != n; ++i) { if(px[i] == NA_INTEGER) { pans_i[i] = NA_INTEGER; continue; } iid = (unsigned)px[i]; if(iid >= nu) iid %= nu; // iid = (px[i] < n) ? px[i] : px[i] % n; // HASH(px[i], K); // get the hash value of x[i] while(h[iid]) { // Check if this hash value has been seen before if(px[h[iid]-1] == px[i]) { // Get the element of x that produced his value. if x[i] is the same, assign it the same index. pans_i[i] = pans_i[h[iid]-1]; // h[id]; goto ibl; } // else, we move forward to the next slot, until we find an empty one... We need to keep checking against the values, // because if we found the same value before, we would also have put it in another slot after the initial one with the same hash value. if(++id >= nu) id %= nu; // ++iid; iid %= nu; // # nocov } // We put the index into the empty slot. h[iid] = i + 1; // need + 1 because for zero the while loop gives false.. pans_i[i] = ++g; // h[id]; ibl:; } } break; case REALSXP: { const double *px = REAL(x); union uno tpv; for (int i = 0; i != n; ++i) { if(ISNAN(px[i])) { pans_i[i] = NA_INTEGER; continue; } tpv.d = px[i]; id = HASH(tpv.u[0] + tpv.u[1], K); while(h[id]) { if(REQUAL(px[h[id]-1], px[i])) { pans_i[i] = pans_i[h[id]-1]; // h[id]; goto rbl; } if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; pans_i[i] = ++g; // h[id]; rbl:; } } break; case CPLXSXP: { const Rcomplex *px = COMPLEX(x); unsigned int u; union uno tpv; Rcomplex tmp; for (int i = 0; i != n; ++i) { tmp.r = (px[i].r == 0.0) ? 0.0 : px[i].r; tmp.i = (px[i].i == 0.0) ? 0.0 : px[i].i; if(C_IsNA(tmp) || C_IsNaN(tmp)) { pans_i[i] = NA_INTEGER; continue; } tpv.d = tmp.r; u = tpv.u[0] ^ tpv.u[1]; tpv.d = tmp.i; u ^= tpv.u[0] ^ tpv.u[1]; id = HASH(u, K); while(h[id]) { if(CEQUAL(px[h[id]-1], px[i])) { pans_i[i] = pans_i[h[id]-1]; // h[id]; goto cbl; } if(++id >= M) id %= M; //++id; id %= M; // # nocov } h[id] = i + 1; pans_i[i] = ++g; // h[id]; cbl:; } } break; case STRSXP: { const SEXP *px = STRING_PTR(x); for (int i = 0; i != n; ++i) { if(px[i] == NA_STRING) { pans_i[i] = NA_INTEGER; continue; } id = HASH(((intptr_t) px[i] & 0xffffffff), K); while(h[id]) { if(px[h[id]-1] == px[i]) { pans_i[i] = pans_i[h[id]-1]; // h[id]; goto sbl; } if(++id >= M) id %= M; //++id; id %= M; } h[id] = i + 1; pans_i[i] = ++g; sbl:; } } break; } Free(h); setAttrib(ans_i, install("N.groups"), ScalarInteger(g)); UNPROTECT(1); return ans_i; } // TODO: Only one M calculation // Think: If in the second grouping variable all entries are the same, you loop through the whole table for each value.. // TODO: Speed up for real values, i.e. system.time(group(DHSBR[1:2])) and system.time(group(wlddev)) (date), especially repeated real values appear slow !! // --> But also integers is slow, i.e. system.time(group(DHSBR[1:2])) when DHSBR[2] is integer. // ************************************************** // This function adds a second vector to the grouping // ************************************************** int dupVecSecond(int *pidx, int *pans_i, SEXP x, const int n, const int ng) { if(length(x) != n) error("Unequal length columns"); int K, tx = TYPEOF(x); size_t M; if (tx == INTSXP || tx == STRSXP || tx == REALSXP || tx == CPLXSXP ) { if(tx == INTSXP && isFactor(x) && (nlevels(x)+1) * ng <= 3 * n) { tx = 1000; M = (size_t)(nlevels(x)+1) * ng + 1; } else { const size_t n2 = 2U * (size_t) n; M = 256; K = 8; while (M < n2) { M *= 2; K++; } M += ng; // Here we addd the number of previous groups... } } else if (tx == LGLSXP) { M = (size_t)ng * 3 + 1; } else error("Type %s is not supported.", type2char(tx)); // # nocov int *h = (int*)Calloc(M, int); // Table to save the hash values, table has size M size_t id = 0, g = 0, hid = 0; switch (tx) { case LGLSXP: { const int *px = LOGICAL(x); for (int i = 0, j; i != n; ++i) { j = (px[i] == NA_LOGICAL) ? pidx[i] : pidx[i] + (px[i] + 1) * ng; if(h[j]) pans_i[i] = h[j]; else pans_i[i] = h[j] = ++g; } } break; case 1000: // This is for factors if feasible... { const int *px = INTEGER(x); for (int i = 0, j; i != n; ++i) { j = (px[i] == NA_INTEGER) ? pidx[i] : pidx[i] + px[i] * ng; if(h[j]) pans_i[i] = h[j]; else pans_i[i] = h[j] = ++g; } } break; // TODO: Think further about this! Perhaps you can also do this totally differently with a second vector capturing the unique values of idx! // See again what Morgan does to his matrix of single groupings... // Note: In general, combining bitwise i.e. px[i] ^ pidx[i] seems slightly faster than multiplying (px[i] * pidx[i])... case INTSXP: { const int *px = INTEGER(x); for (int i = 0; i != n; ++i) { id = HASH(px[i] * pidx[i], K) + pidx[i]; // Need multiplication here instead of bitwise, see your benchmark with 100 mio. obs where second group is just sample.int(1e4, 1e8, T), there bitwise is very slow!! while(h[id]) { hid = h[id]-1; if(px[hid] == px[i] && pidx[hid] == pidx[i]) { pans_i[i] = pans_i[hid]; goto ibl; } if(++id >= M) id %= M; // ++id; id %= M; // # nocov } h[id] = i + 1; pans_i[i] = ++g; // h[id]; ibl:; } } break; case REALSXP: { const double *px = REAL(x); union uno tpv; for (int i = 0; i != n; ++i) { tpv.d = px[i]; // R_IsNA(px[i]) ? NA_REAL : (R_IsNaN(px[i]) ? R_NaN :px[i]); id = HASH((tpv.u[0] + tpv.u[1]) ^ pidx[i], K) + pidx[i]; // Note: This is much faster than just adding pidx[i] to the hash value... while(h[id]) { // Problem: This value might be seen before, but not in combination with that pidx value... hid = h[id]-1; // The issue here is that REQUAL(px[hid], px[i]) could be true but pidx[hid] == pidx[i] fails, although the same combination of px and pidx could be seen earlier before... if(REQUAL(px[hid], px[i]) && pidx[hid] == pidx[i]) { pans_i[i] = pans_i[hid]; goto rbl; } if(++id >= M) id %= M; //++id; id %= M; } h[id] = i + 1; pans_i[i] = ++g; // h[id]; rbl:; } } break; case CPLXSXP: { const Rcomplex *px = COMPLEX(x); unsigned int u; union uno tpv; Rcomplex tmp; for (int i = 0; i != n; ++i) { tmp.r = (px[i].r == 0.0) ? 0.0 : px[i].r; tmp.i = (px[i].i == 0.0) ? 0.0 : px[i].i; if (C_IsNA(tmp)) { tmp.r = tmp.i = NA_REAL; } else if (C_IsNaN(tmp)) { tmp.r = tmp.i = R_NaN; } tpv.d = tmp.r; u = tpv.u[0] ^ tpv.u[1]; tpv.d = tmp.i; u ^= tpv.u[0] ^ tpv.u[1]; id = HASH(u ^ pidx[i], K) + pidx[i]; while(h[id]) { hid = h[id]-1; if(CEQUAL(px[hid], px[i]) && pidx[hid] == pidx[i]) { pans_i[i] = pans_i[hid]; goto cbl; } if(++id >= M) id %= M; //++id; id %= M; // # nocov } h[id] = i + 1; pans_i[i] = ++g; // h[id]; cbl:; } } break; case STRSXP: { const SEXP *px = STRING_PTR(x); for (int i = 0; i != n; ++i) { id = HASH(((intptr_t) px[i] & 0xffffffff) ^ pidx[i], K) + pidx[i]; while(h[id]) { hid = h[id]-1; if(px[hid] == px[i] && pidx[hid] == pidx[i]) { pans_i[i] = pans_i[hid]; goto sbl; } if(++id >= M) id %= M; //++id; id %= M; // # nocov } h[id] = i + 1; pans_i[i] = ++g; // h[id]; sbl:; } } break; } Free(h); // setAttrib(ans_i, sym_ng, ScalarInteger(g)); // UNPROTECT(1); // return ans_i; return g; } // ************************************************************************ // This function brings everything together for vectors or lists of vectors // ************************************************************************ SEXP groupVec(SEXP X, SEXP starts, SEXP sizes) { int l = length(X), islist = TYPEOF(X) == VECSXP, start = asLogical(starts), size = asLogical(sizes), nprotect = 0; // Better not exceptions to fundamental algorithms, when a couple of user-level functions return qG objects... // if(islist == 0 && OBJECT(X) != 0 && inherits(X, "qG") && inherits(X, "na.included")) return X; // return "qG" objects SEXP idx = islist ? dupVecIndex(VECTOR_ELT(X, 0)) : dupVecIndex(X); if(!(islist && l > 1) && start == 0 && size == 0) return idx; // l == 1 && PROTECT(idx); ++nprotect; SEXP sym_ng = PROTECT(install("N.groups")), res; ++nprotect; int ng = asInteger(getAttrib(idx, sym_ng)), n = length(idx); if(islist && l > 1) { SEXP ans = PROTECT(allocVector(INTSXP, n)); ++nprotect; int i = 1, *pidx = INTEGER(idx), *pans = INTEGER(ans); for( ; i < l; ++i) { if(ng == n) break; if(i % 2) { ng = dupVecSecond(pidx, pans, VECTOR_ELT(X, i), n, ng); } else { ng = dupVecSecond(pans, pidx, VECTOR_ELT(X, i), n, ng); } } res = i % 2 ? idx : ans; setAttrib(res, sym_ng, ScalarInteger(ng)); } else res = idx; // Cumpoting group starts and sizes attributes if(start || size) { PROTECT(res); ++nprotect; int *pres = INTEGER(res); if(start && size) { // Protect res ?? SEXP gs, st; setAttrib(res, install("starts"), st = allocVector(INTSXP, ng)); setAttrib(res, install("group.sizes"), gs = allocVector(INTSXP, ng)); int *pgs = INTEGER(gs), *pst = INTEGER(st); memset(pgs, 0, sizeof(int) * ng); --pgs; memset(pst, 0, sizeof(int) * ng); --pst; for(int i = 0; i != n; ++i) { ++pgs[pres[i]]; if(pst[pres[i]] == 0) pst[pres[i]] = i + 1; } } else if(start) { SEXP st; setAttrib(res, install("starts"), st = allocVector(INTSXP, ng)); int *pst = INTEGER(st), k = 0; memset(pst, 0, sizeof(int) * ng); --pst; for(int i = 0; i != n; ++i) { if(pst[pres[i]] == 0) { pst[pres[i]] = i + 1; if(++k == ng) break; } } } else { SEXP gs; setAttrib(res, install("group.sizes"), gs = allocVector(INTSXP, ng)); int *pgs = INTEGER(gs); memset(pgs, 0, sizeof(int) * ng); --pgs; for(int i = 0; i != n; ++i) ++pgs[pres[i]]; } } UNPROTECT(nprotect); return res; } // This version is only for atomic vectors (factor generation) SEXP groupAtVec(SEXP X, SEXP starts, SEXP naincl) { int start = asLogical(starts), nain = asLogical(naincl); // Note: These functions will give errors for unsupported types... SEXP idx = nain ? dupVecIndex(X) : dupVecIndexKeepNA(X); if(start == 0) return idx; PROTECT(idx); SEXP sym_ng = PROTECT(install("N.groups")); int ng = asInteger(getAttrib(idx, sym_ng)), n = length(idx); int *pidx = INTEGER(idx); SEXP st; setAttrib(idx, install("starts"), st = allocVector(INTSXP, ng)); int *pst = INTEGER(st), k = 0; memset(pst, 0, sizeof(int) * ng); --pst; if(nain) { for(int i = 0; i != n; ++i) { if(pst[pidx[i]] == 0) { pst[pidx[i]] = i + 1; if(++k == ng) break; } } } else { for(int i = 0; i != n; ++i) { if(pidx[i] != NA_INTEGER && pst[pidx[i]] == 0) { pst[pidx[i]] = i + 1; if(++k == ng) break; } } } UNPROTECT(2); return idx; } // From the kit package... /* * Data.Frame */ // SEXP dupDataFrameR(SEXP x) { // move to matrix if possible // // const SEXP *restrict px = SEXPPTR_RO(x); // const R_xlen_t len_x = xlength(x); // const R_xlen_t len_i = xlength(px[0]); // SEXP ans = R_NilValue; // SEXP mlv = PROTECT(allocMatrix(INTSXP, (int)len_i, (int)len_x)); // for (R_xlen_t i = 0; i < len_x; ++i) { // memcpy(INTEGER(mlv)+i*len_i, INTEGER(PROTECT(dupVecIndexOnlyR(px[i]))), (unsigned)len_i*sizeof(int)); // } // UNPROTECT((int)len_x); // const size_t n2 = 2U * (size_t) len_i; // size_t M = 256; // int K = 8; // while (M < n2) { // M *= 2; // K++; // } // R_xlen_t count = 0; // int *restrict h = (int*) Calloc(M, int); // const int *restrict v = INTEGER(mlv); // int *restrict pans = (int*) Calloc(len_i, int); // size_t id = 0; // // for (R_xlen_t i = 0; i < len_i; ++i) { // R_xlen_t key = 0; // for (R_xlen_t j = 0; j < len_x; ++j) { // key ^= HASH(v[i+j*len_i],K)*97*(j+1); // } // id = HASH(key, K); // while (h[id]) { // for (R_xlen_t j = 0; j < len_x; ++j) { // if (v[h[id]-1+j*len_i] != v[i+j*len_i]) { // goto label1; // } // } // goto label2; // label1:; // id++; id %= M; // } // h[id] = (int) i + 1; // pans[i]++; // count++; // label2:; // } // Free(h); // UNPROTECT(1); // SEXP indx = PROTECT(allocVector(INTSXP, count)); // int ct = 0; // int *restrict py = INTEGER(indx); // for (int i = 0; ct < count; ++i) { // if (pans[i]) { // py[ct++] = i; // } // } // SEXP output = PROTECT(subSetRowDataFrame(x, indx)); // Free(pans); // UNPROTECT(2); // return output; // } /* * Data.Frame */ // SEXP dupLenDataFrameR(SEXP x) { // const SEXP *restrict px = SEXPPTR_RO(x); // const R_xlen_t len_x = xlength(x); // // bool allT = true; // // const SEXPTYPE t0 = UTYPEOF(px[0]); // // for (int i = 1; i < len_x; ++i) { // // if (UTYPEOF(px[i]) != t0) { // // allT = false; // // break; // // } // // } // // if (allT) { // // SEXP output = PROTECT(dupLenMatrixR(PROTECT(dfToMatrix(x)))); // // UNPROTECT(2); // // return output; // // } // const R_xlen_t len_i = xlength(px[0]); // SEXP mlv = PROTECT(allocMatrix(INTSXP, (int)len_i, (int)len_x)); // for (R_xlen_t i = 0; i < len_x; ++i) { // memcpy(INTEGER(mlv)+i*len_i, INTEGER(PROTECT(dupVecIndexOnlyR(px[i], ScalarLogical(false)))), (unsigned)len_i*sizeof(int)); // } // UNPROTECT((int)len_x); // const size_t n2 = 2U * (size_t) len_i; // size_t M = 256; // int K = 8; // while (M < n2) { // M *= 2; // K++; // } // R_xlen_t count = 0; // int *restrict h = (int*) Calloc(M, int); // const int *restrict v = INTEGER(mlv); // size_t id = 0; // for (R_xlen_t i = 0; i < len_i; ++i) { // R_xlen_t key = 0; // for (R_xlen_t j = 0; j < len_x; ++j) { // key ^= HASH(v[i+j*len_i],K)*97*(j+1); // } // id = HASH(key, K); // while (h[id]) { // for (R_xlen_t j = 0; j < len_x; ++j) { // if (v[h[id]-1+j*len_i] != v[i+j*len_i]) { // goto label1; // } // } // goto label2; // label1:; // id++; id %= M; // } // h[id] = (int) i + 1; // count++; // label2:; // } // Free(h); // UNPROTECT(1); // return ScalarInteger(count); // } collapse/src/mrtl_mctl.cpp0000644000176200001440000002017013743473222015353 0ustar liggesusers// // [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; template List mrtlImpl(Matrix X, bool names, int ret) { int l = X.nrow(); List out(l); for(int i = l; i--; ) out[i] = X(i, _); if(names) { SEXP dn = Rf_getAttrib(X, R_DimNamesSymbol); if(dn == R_NilValue) dn = List::create(R_NilValue, R_NilValue); // should also work for plain matrices ! if(Rf_isNull(VECTOR_ELT(dn, 0))) { CharacterVector rn(l); std::string VS = std::string("V"); // faster ! for (int i = l; i--; ) rn[i] = VS + std::to_string(i+1); Rf_namesgets(out, rn); } else Rf_namesgets(out, VECTOR_ELT(dn, 0)); if(ret != 0) { if(Rf_isNull(VECTOR_ELT(dn, 1)) || ret == 2) { Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -X.ncol())); } else Rf_setAttrib(out, R_RowNamesSymbol, VECTOR_ELT(dn, 1)); if(ret == 1) { Rf_classgets(out, Rf_mkString("data.frame")); } else { Rf_classgets(out, CharacterVector::create("data.table","data.frame")); } } } else if (ret != 0) { CharacterVector rn(l); std::string VS = std::string("V"); // faster ! for (int i = l; i--; ) rn[i] = VS + std::to_string(i+1); Rf_namesgets(out, rn); Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -X.ncol())); if(ret == 1) { Rf_classgets(out, Rf_mkString("data.frame")); } else { Rf_classgets(out, CharacterVector::create("data.table","data.frame")); } } return out; } template <> List mrtlImpl(Matrix X, bool names, int ret) { stop("Not supported SEXP type!"); } template <> List mrtlImpl(Matrix X, bool names, int ret) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] SEXP mrtl(const SEXP& X, bool names = false, int ret = 0){ RCPP_RETURN_MATRIX(mrtlImpl, X, names, ret); } template List mctlImpl(Matrix X, bool names, int ret) { int l = X.ncol(); List out(l); for(int i = l; i--; ) out[i] = X(_, i); if(names) { SEXP dn = Rf_getAttrib(X, R_DimNamesSymbol); if(dn == R_NilValue) dn = List::create(R_NilValue, R_NilValue); // should also work for plain matrices ! if(Rf_isNull(VECTOR_ELT(dn, 1))) { CharacterVector rn(l); std::string VS = std::string("V"); // faster ! for (int i = l; i--; ) rn[i] = VS + std::to_string(i+1); Rf_namesgets(out, rn); } else Rf_namesgets(out, VECTOR_ELT(dn, 1)); if(ret != 0) { if(Rf_isNull(VECTOR_ELT(dn, 0)) || ret == 2) { Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -X.nrow())); } else Rf_setAttrib(out, R_RowNamesSymbol, VECTOR_ELT(dn, 0)); if(ret == 1) { Rf_classgets(out, Rf_mkString("data.frame")); } else { Rf_classgets(out, CharacterVector::create("data.table","data.frame")); } } } else if (ret != 0) { CharacterVector rn(l); std::string VS = std::string("V"); // faster ! for (int i = l; i--; ) rn[i] = VS + std::to_string(i+1); Rf_namesgets(out, rn); Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -X.nrow())); if(ret == 1) { Rf_classgets(out, Rf_mkString("data.frame")); } else { Rf_classgets(out, CharacterVector::create("data.table","data.frame")); } } return out; } template <> List mctlImpl(Matrix X, bool names, int ret) { stop("Not supported SEXP type!"); } template <> List mctlImpl(Matrix X, bool names, int ret) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] SEXP mctl(const SEXP& X, bool names = false, int ret = 0){ RCPP_RETURN_MATRIX(mctlImpl, X, names, ret); } // Experimental Matrix apply functions -> Need to make faster, see Hmisc::mApply // template // Slower than lapply(mctl...) // List mrtlapplyImpl(Matrix X, Function FUN, bool names, int ret) { // int l = X.nrow(); // List out(l); // for(int i = l; i--; ) { // MatrixRow Xi = X(i,_); // out[i] = FUN(Xi); // } // if(names && X.hasAttribute("dimnames")) { // List dn(2); // dn = X.attr("dimnames"); // if (Rf_isNull(dn[0])) { // CharacterVector rn(l); // for (int i = l; i--; ) { // rn[i] = std::string("V") + std::to_string(i+1); // } // out.attr("names") = rn; // } else out.attr("names") = dn[0]; // if (ret != 0) { // if (Rf_isNull(dn[1])) { // out.attr("row.names") = NumericVector::create(NA_REAL,-X.ncol()); // } else out.attr("row.names") = dn[1]; // if(ret == 1) { // out.attr("class") = "data.frame"; // } else { // out.attr("class") = CharacterVector::create("data.table","data.frame"); // } // } // } else if (ret != 0) { // CharacterVector rn(l); // for (int i = l; i--; ) { // rn[i] = std::string("V") + std::to_string(i+1); // } // out.attr("names") = rn; // out.attr("row.names") = NumericVector::create(NA_REAL,-X.ncol()); // if (ret == 1) { // out.attr("class") = "data.frame"; // } else { // out.attr("class") = CharacterVector::create("data.table","data.frame"); // } // } // return out; // } // template // Matrix mrtmapplyImpl(Matrix X, Function FUN) { // int l = X.nrow(); // Vector out0 = FUN(X(0,_)); // What if not same type ?? // int col = out0.size(); // Matrix out = no_init_matrix(l, col); // for(int i = 1; i != l; ++i) { // out(i,_) = FUN(X(i,_)); // } // if(X.ncol() == col) SHALLOW_DUPLICATE_ATTRIB(out, X); // else rownames(out) = rownames(X); // return out; // } // template // Slower than lapply(mctl...) // List mctlapplyImpl(Matrix X, Function FUN, bool names, int ret) { // int l = X.ncol(); // List out(l); // for(int i = l; i--; ) { // MatrixColumn Xi = X(_,i); // out[i] = FUN(Xi); // } // if(names && X.hasAttribute("dimnames")) { // List dn(2); // dn = X.attr("dimnames"); // if (Rf_isNull(dn[1])) { // CharacterVector cn(l); // for (int i = l; i--; ) { // cn[i] = std::string("V") + std::to_string(i+1); // } // out.attr("names") = cn; // } else out.attr("names") = dn[1]; // if (ret != 0) { // if (Rf_isNull(dn[0])) { // out.attr("row.names") = NumericVector::create(NA_REAL,-X.nrow()); // } else out.attr("row.names") = dn[0]; // if(ret == 1) { // out.attr("class") = "data.frame"; // } else { // out.attr("class") = CharacterVector::create("data.table","data.frame"); // } // } // } else if (ret != 0) { // CharacterVector cn(l); // for (int i = l; i--; ) { // cn[i] = std::string("V") + std::to_string(i+1); // } // out.attr("names") = cn; // out.attr("row.names") = NumericVector::create(NA_REAL,-X.nrow()); // if (ret == 1) { // out.attr("class") = "data.frame"; // } else { // out.attr("class") = CharacterVector::create("data.table","data.frame"); // } // } // return out; // } // template // Matrix mctmapplyImpl(Matrix X, Function FUN) { // int l = X.ncol(); // Vector out0 = FUN(X(_,0)); // What if not same type ?? // int row = out0.size(); // Matrix out = no_init_matrix(row, l); // for(int i = 1; i != l; ++i) { // NumericMatrix::Column outi = out(_,i); // outi = FUN(X(_,i)); // } // if(X.nrow() == row) SHALLOW_DUPLICATE_ATTRIB(out, X); // else colnames(out) = colnames(X); // return out; // } // // [[Rcpp::export]] // SEXP mrtlapply(SEXP X, Function FUN, bool names = false, int ret = 0){ // RCPP_RETURN_MATRIX(mrtlapplyImpl, X, FUN, names, ret); // } // // [[Rcpp::export]] // SEXP mrtmapply(SEXP X, Function FUN){ // RCPP_RETURN_MATRIX(mrtmapplyImpl, X, FUN); // } // // [[Rcpp::export]] // SEXP mctlapply(SEXP X, Function FUN, bool names = false, int ret = 0){ // RCPP_RETURN_MATRIX(mctlapplyImpl, X, FUN, names, ret); // } // // [[Rcpp::export]] // SEXP mctmapply(SEXP X, Function FUN){ // RCPP_RETURN_MATRIX(mctmapplyImpl, X, FUN); // } collapse/src/data.table_subset.c0000644000176200001440000007264314174223734016416 0ustar liggesusers/* This code is adapted from the data.table package: http://r-datatable.com and licensed under a Mozilla Public License 2.0 (MPL-2.0) license. */ #include "data.table.h" // selfref stuff is taken from data.tables assign.c static void finalizer(SEXP p) { SEXP x; R_len_t n, l, tl; if(!R_ExternalPtrAddr(p)) error("Internal error: finalizer hasn't received an ExternalPtr"); // # nocov p = R_ExternalPtrTag(p); if (!isString(p)) error("Internal error: finalizer's ExternalPtr doesn't see names in tag"); // # nocov l = LENGTH(p); tl = TRUELENGTH(p); if (l<0 || tl0 but 0 && tl < l) error("Internal error, please report (including result of sessionInfo()) to collapse issue tracker: tl (%d) < l (%d) but tl of class is marked.", tl, l); // # nocov if (tl > l+10000) warning("tl (%d) is greater than 10,000 items over-allocated (l = %d). If you didn't set the collapse_DT_alloccol option to be very large, please report to collapse issue tracker including the result of sessionInfo().",tl,l); // TODO: MAKE THIS WORK WITHOUT SHALLOW COPYING EVERY TIME !!! // if (n > tl) return shallow(dt, R_NilValue, n); // usual case (increasing alloc) // SEXP nam = PROTECT(getAttrib(dt, R_NamesSymbol)); // if(LENGTH(nam) != l) SETLENGTH(nam, l); // SET_TRUELENGTH(nam, n); // setselfref(dt); // better, otherwise may be invalid !! // UNPROTECT(1); // return(dt); } // #pragma GCC diagnostic ignored "-Wunknown-pragmas" // don't display this warning!! // https://stackoverflow.com/questions/1867065/how-to-suppress-gcc-warnings-from-library-headers?noredirect=1&lq=1 static void subsetVectorRaw(SEXP ans, SEXP source, SEXP idx, const bool anyNA) // Only for use by subsetDT() or subsetVector() below, hence static { const int n = length(idx); if (length(ans)!=n) error("Internal error: subsetVectorRaw length(ans)==%d n=%d", length(ans), n); const int *restrict idxp = INTEGER(idx); // anyNA refers to NA _in idx_; if there's NA in the data (source) that's just regular data to be copied // negatives, zeros and out-of-bounds have already been dealt with in convertNegAndZero so we can rely // here on idx in range [1,length(ans)]. // _Pragma("omp parallel for num_threads(getDTthreads())") (in PARLOOP below) // _Pragma("omp parallel for num_threads(getDTthreads())") #define PARLOOP(_NAVAL_) \ if (anyNA) { \ for (int i = 0; i != n; ++i) { \ int elem = idxp[i]; \ ap[i] = elem==NA_INTEGER ? _NAVAL_ : sp[elem-1]; \ } \ } else { \ for (int i = 0; i != n; ++i) { \ ap[i] = sp[idxp[i]-1]; \ } \ } // For small n such as 2,3,4 etc we hope OpenMP will be sensible inside it and not create a team with each thread doing just one item. Otherwise, // call overhead would be too high for highly iterated calls on very small subests. Timings were tested in #3175 // Futher, we desire (currently at least) to stress-test the threaded code (especially in latest R-devel) on small data to reduce chance that bugs // arise only over a threshold of n. switch(TYPEOF(source)) { case INTSXP: case LGLSXP: { int *sp = INTEGER(source), *ap = INTEGER(ans); PARLOOP(NA_INTEGER) } break; case REALSXP : { if (INHERITS(source, char_integer64)) { int64_t *sp = (int64_t *)REAL(source); int64_t *ap = (int64_t *)REAL(ans); PARLOOP(INT64_MIN) } else { double *sp = REAL(source), *ap = REAL(ans); PARLOOP(NA_REAL) } } break; case STRSXP : { // write barrier (assigning strings/lists) is not thread safe. Hence single threaded. // To go parallel here would need access to NODE_IS_OLDER, at least. Given gcgen, mark and named // are upper bounded and max 3, REFCNT==REFCNTMAX could be checked first and then critical SET_ if not. // Inside that critical just before SET_ it could check REFCNTmax since they should have been dealt with by convertNegAndZeroIdx() called ealier at R level. // single cache efficient sweep with prefetch, so very low priority to go parallel { if (!isInteger(idx)) error("Internal error. 'idx' is type '%s' not 'integer'", type2char(TYPEOF(idx))); // # nocov bool anyNA = false; // anyLess=false, // int last = INT32_MIN; int *idxp = INTEGER(idx), n = LENGTH(idx); for (int i = 0; i != n; ++i) { int elem = idxp[i]; if (elem<=0 && elem!=NA_INTEGER) return "Internal inefficiency: idx contains negatives or zeros. Should have been dealt with earlier."; // e.g. test 762 (TODO-fix) if (elem>max) return "Internal inefficiency: idx contains an item out-of-range. Should have been dealt with earlier."; // e.g. test 1639.64 anyNA |= elem==NA_INTEGER; // anyLess |= elem= 0.", max); // # nocov includes NA which will print as INT_MIN int *idxp = INTEGER(idx); bool stop = false; // #pragma omp parallel for num_threads(getDTthreads()) for (int i = 0; i != n; ++i) { if (stop) continue; int elem = idxp[i]; if ((elem<1 && elem!=NA_INTEGER) || elem>max) stop=true; } if (!stop) return(idx); // most common case to return early: no 0, no negative; all idx either NA or in range [1-max] // --------- // else massage the input to a standard idx where all items are either NA or in range [1,max] ... int countNeg=0, countZero=0, countNA=0, firstOverMax=0; for (int i = 0; i != n; ++i) { int elem = idxp[i]; if (elem==NA_INTEGER) countNA++; else if (elem<0) countNeg++; else if (elem==0) countZero++; else if (elem>max && firstOverMax==0) firstOverMax=i+1; } if (firstOverMax && LOGICAL(allowOverMax)[0]==FALSE) { error("i[%d] is %d which is out of range [1,nrow=%d]", firstOverMax, idxp[firstOverMax-1], max); } int countPos = n-countNeg-countZero-countNA; if (countPos && countNeg) { int i = 0, firstNeg=0, firstPos=0; while (i != n && (firstNeg==0 || firstPos==0)) { int elem = idxp[i]; if (firstPos==0 && elem>0) firstPos=i+1; if (firstNeg==0 && elem<0 && elem!=NA_INTEGER) firstNeg=i+1; i++; } error("Item %d of i is %d and item %d is %d. Cannot mix positives and negatives.", firstNeg, idxp[firstNeg-1], firstPos, idxp[firstPos-1]); } if (countNeg && countNA) { int i = 0, firstNeg=0, firstNA=0; while (i != n && (firstNeg==0 || firstNA==0)) { int elem = idxp[i]; if (firstNeg==0 && elem<0 && elem!=NA_INTEGER) firstNeg=i+1; if (firstNA==0 && elem==NA_INTEGER) firstNA=i+1; i++; } error("Item %d of i is %d and item %d is NA. Cannot mix negatives and NA.", firstNeg, idxp[firstNeg-1], firstNA); } SEXP ans; if (countNeg==0) { // just zeros to remove, or >max to convert to NA ans = PROTECT(allocVector(INTSXP, n - countZero)); int *ansp = INTEGER(ans); for (int i = 0, ansi = 0; i != n; ++i) { int elem = idxp[i]; if (elem==0) continue; ansp[ansi++] = elem>max ? NA_INTEGER : elem; } } else { // idx is all negative without any NA but perhaps some zeros bool *keep = (bool *)R_alloc(max, sizeof(bool)); // 4 times less memory that INTSXP in src/main/subscript.c for (int i = 0; i != max; ++i) keep[i] = true; int countRemoved=0, countDup=0, countBeyond=0; // idx=c(-10,-5,-10) removing row 10 twice int firstBeyond=0, firstDup=0; for (int i = 0; i != n; ++i) { int elem = -idxp[i]; if (elem==0) continue; if (elem>max) { countBeyond++; if (firstBeyond==0) firstBeyond=i+1; continue; } if (!keep[elem-1]) { countDup++; if (firstDup==0) firstDup=i+1; } else { keep[elem-1] = false; countRemoved++; } } if (countBeyond) warning("Item %d of i is %d but there are only %d rows. Ignoring this and %d more like it out of %d.", firstBeyond, idxp[firstBeyond-1], max, countBeyond-1, n); if (countDup) warning("Item %d of i is %d which removes that item but that has occurred before. Ignoring this dup and %d other dups.", firstDup, idxp[firstDup-1], countDup-1); int ansn = max-countRemoved; ans = PROTECT(allocVector(INTSXP, ansn)); int *ansp = INTEGER(ans); for (int i = 0, ansi = 0; i != max; ++i) { if (keep[i]) ansp[ansi++] = i+1; } } UNPROTECT(1); return ans; } static void checkCol(SEXP col, int colNum, int nrow, SEXP x) { if (isNull(col)) error("Column %d is NULL; malformed data.table.", colNum); if (isNewList(col) && INHERITS(col, char_dataframe)) { SEXP names = getAttrib(x, R_NamesSymbol); error("Column %d ['%s'] is a data.frame or data.table; malformed data.table.", colNum, isNull(names)?"":CHAR(STRING_ELT(names,colNum-1))); } if (length(col)!=nrow) { SEXP names = getAttrib(x, R_NamesSymbol); error("Column %d ['%s'] is length %d but column 1 is length %d; malformed data.table.", colNum, isNull(names)?"":CHAR(STRING_ELT(names,colNum-1)), length(col), nrow); } } /* helper */ SEXP extendIntVec(SEXP x, int len, int val) { SEXP out = PROTECT(allocVector(INTSXP, len + 1)); int *pout = INTEGER(out), *px = INTEGER(x); for(int i = len; i--; ) pout[i] = px[i]; pout[len] = val; UNPROTECT(1); return out; } /* subset columns of a list efficiently */ SEXP subsetCols(SEXP x, SEXP cols, SEXP checksf) { // SEXP fretall if(TYPEOF(x) != VECSXP) error("x is not a list."); int l = LENGTH(x), nprotect = 3, oxl = OBJECT(x) != 0; if(l == 0) return x; // ncol == 0 -> Nope, need emty selections such as cat_vars(mtcars) !! PROTECT_INDEX ipx; PROTECT_WITH_INDEX(cols = convertNegAndZeroIdx(cols, ScalarInteger(l), ScalarLogical(FALSE)), &ipx); int ncol = LENGTH(cols); int *pcols = INTEGER(cols); // if(ncol == 0 || (asLogical(fretall) && l == ncol)) return(x); // names SEXP nam = PROTECT(getAttrib(x, R_NamesSymbol)); // sf data frames: Need to add sf_column if(oxl && asLogical(checksf) && INHERITS(x, char_sf)) { int sfcoln = NA_INTEGER, sf_col_sel = 0; SEXP *pnam = STRING_PTR(nam), sfcol = asChar(getAttrib(x, sym_sf_column)); for(int i = l; i--; ) { if(pnam[i] == sfcol) { sfcoln = i + 1; break; } } if(sfcoln == NA_INTEGER) error("sf data frame has no attribute 'sf_column'"); for(int i = ncol; i--; ) { if(pcols[i] == sfcoln) { sf_col_sel = 1; break; } } if(sf_col_sel == 0) { REPROTECT(cols = extendIntVec(cols, ncol, sfcoln), ipx); ++ncol; pcols = INTEGER(cols); } } SEXP ans = PROTECT(allocVector(VECSXP, ncol)); SEXP *px = SEXPPTR(x), *pans = SEXPPTR(ans); for(int i = 0; i != ncol; ++i) { pans[i] = px[pcols[i]-1]; // SET_VECTOR_ELT(ans, i, VECTOR_ELT(x, pcols[i]-1)); } if(!isNull(nam)) { SEXP tmp = PROTECT(allocVector(STRSXP, ncol)); setAttrib(ans, R_NamesSymbol, tmp); subsetVectorRaw(tmp, nam, cols, /*anyNA=*/false); ++nprotect; } copyMostAttrib(x, ans); // includes row.names and class... // clear any index that was copied over by copyMostAttrib(), e.g. #1760 and #1734 (test 1678) // setAttrib(ans, sym_index, R_NilValue); -> deletes "index" attribute of pdata.frame -> don't use!! if(oxl && INHERITS(x, char_datatable)) { setAttrib(ans, sym_datatable_locked, R_NilValue); int n = asInteger(GetOption1(sym_collapse_DT_alloccol)); UNPROTECT(nprotect); // This needs to be here !! (asInteger and GetOption1 are allocating functions) return shallow(ans, R_NilValue, ncol + n); // 1024 is data.table default.. // setselfref(ans); // done by shallow } UNPROTECT(nprotect); return ans; } /* * subsetDT - Subsets a data.table * NOTE: * 1) 'rows' and 'cols' are 1-based, passed from R level * 2) Originally for subsetting vectors in fcast and now the beginnings of [.data.table ported to C * 3) Immediate need is for R 3.1 as lglVec[1] now returns R's global TRUE and we don't want := to change that global [think 1 row data.tables] * 4) Could do it other ways but may as well go to C now as we were going to do that anyway */ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols, SEXP checkrows) { // , SEXP fastret int nprotect=0, oxl = OBJECT(x) != 0; if (!isNewList(x)) error("Internal error. Argument 'x' to CsubsetDT is type '%s' not 'list'", type2char(TYPEOF(rows))); // # nocov if (!length(x)) return x; // return empty list const int nrow = length(VECTOR_ELT(x, 0)); // if fast return, return data.table if all rows selected through positive indices... // if(asLogical(fastret) && nrow == LENGTH(rows) && INTEGER(rows)[0] > 0) { // if(LENGTH(cols) == length(x)) return x; // return subsetCols(x, cols); // } // check index once up front for 0 or NA, for branchless subsetVectorRaw which is repeated for each column bool anyNA=false; // , orderedSubset=true; // true for when rows==null (meaning all rows) if (asLogical(checkrows) && !isNull(rows) && check_idx(rows, nrow, &anyNA)!=NULL) { // , &orderedSubset SEXP max = PROTECT(ScalarInteger(nrow)); nprotect++; rows = PROTECT(convertNegAndZeroIdx(rows, max, ScalarLogical(TRUE))); nprotect++; const char *err = check_idx(rows, nrow, &anyNA); // , &orderedSubset if (err!=NULL) error(err); } if (!isInteger(cols)) error("Internal error. Argument 'cols' to Csubset is type '%s' not 'integer'", type2char(TYPEOF(cols))); // # nocov int ncol = LENGTH(cols), l = LENGTH(x), *pcols = INTEGER(cols); for (int i = 0; i != ncol; ++i) { if (pcols[i] < 1 || pcols[i] > l) error("Item %d of 'cols' is %d which is outside 1-based range [1,ncol(x)=%d]", i+1, pcols[i], l); } // Adding sf geometry column if not already selected... if(oxl && INHERITS(x, char_sf)) { int sfcoln = NA_INTEGER, sf_col_sel = 0; SEXP nam = PROTECT(getAttrib(x, R_NamesSymbol)); SEXP *pnam = STRING_PTR(nam), sfcol = asChar(getAttrib(x, sym_sf_column)); for(int i = l; i--; ) { if(pnam[i] == sfcol) { sfcoln = i + 1; break; } } UNPROTECT(1); if(sfcoln == NA_INTEGER) error("sf data frame has no attribute 'sf_column'"); for(int i = ncol; i--; ) { if(pcols[i] == sfcoln) { sf_col_sel = 1; break; } } if(sf_col_sel == 0) { cols = PROTECT(extendIntVec(cols, LENGTH(cols), sfcoln)); ++ncol; ++nprotect; pcols = INTEGER(cols); } } // int overAlloc = 1024; // checkOverAlloc(GetOption(install("datatable.alloccol"), R_NilValue)); SEXP ans = PROTECT(allocVector(VECSXP, ncol)); nprotect++; // +overAlloc // doing alloc.col directly here; eventually alloc.col can be deprecated. // user-defined and superclass attributes get copied as from v1.12.0 copyMostAttrib(x, ans); // most means all except R_NamesSymbol, R_DimSymbol and R_DimNamesSymbol // includes row.names (oddly, given other dims aren't) and "sorted" dealt with below // class is also copied here which retains superclass name in class vector as has been the case for many years; e.g. tests 1228.* for #5296 // This is because overalloc.. creating columns by reference stuff.. // SET_TRUELENGTH(ans, LENGTH(ans)); // SETLENGTH(ans, LENGTH(cols)); int ansn; SEXP *px = SEXPPTR(x), *pans = SEXPPTR(ans); if (isNull(rows)) { ansn = nrow; for (int i = 0; i != ncol; ++i) { SEXP thisCol = px[pcols[i]-1]; checkCol(thisCol, pcols[i], nrow, x); pans[i] = thisCol; // copyAsPlain(thisCol) -> No deep copy // materialize the column subset as we have always done for now, until REFCNT is on by default in R (TODO) } } else { ansn = LENGTH(rows); // has been checked not to contain zeros or negatives, so this length is the length of result for (int i = 0; i != ncol; ++i) { SEXP source = px[pcols[i]-1]; checkCol(source, pcols[i], nrow, x); SEXP target; SET_VECTOR_ELT(ans, i, target = allocVector(TYPEOF(source), ansn)); copyMostAttrib(source, target); subsetVectorRaw(target, source, rows, anyNA); // parallel within column } } SEXP tmp = PROTECT(allocVector(STRSXP, ncol)); nprotect++; // SET_TRUELENGTH(tmp, LENGTH(tmp)); // SETLENGTH(tmp, LENGTH(cols)); setAttrib(ans, R_NamesSymbol, tmp); subsetVectorRaw(tmp, getAttrib(x, R_NamesSymbol), cols, /*anyNA=*/false); if(oxl) { tmp = PROTECT(allocVector(INTSXP, 2)); nprotect++; INTEGER(tmp)[0] = NA_INTEGER; INTEGER(tmp)[1] = -ansn; setAttrib(ans, R_RowNamesSymbol, tmp); // The contents of tmp must be set before being passed to setAttrib(). setAttrib looks at tmp value and copies it in the case of R_RowNamesSymbol. Caused hard to track bug around 28 Sep 2014. // clear any index that was copied over by copyMostAttrib() above, e.g. #1760 and #1734 (test 1678) setAttrib(ans, sym_index, R_NilValue); // also ok for pdata.frame (can't use on subsetted or ordered data frame) } if(oxl && INHERITS(x, char_datatable)) { setAttrib(ans, sym_sorted, R_NilValue); setAttrib(ans, sym_datatable_locked, R_NilValue); int n = asInteger(GetOption1(sym_collapse_DT_alloccol)); UNPROTECT(nprotect); // This needs to be here !! (asInteger and GetOption1 are allocating functions) return shallow(ans, R_NilValue, ncol + n); // 1024 is data.table default.. // setselfref(ans); // done by shallow } UNPROTECT(nprotect); return ans; } SEXP subsetVector(SEXP x, SEXP idx, SEXP checkidx) { // idx is 1-based passed from R level bool anyNA = false; //, orderedSubset=false; int nprotect=0; if (isNull(x)) error("Internal error: NULL can not be subset. It is invalid for a data.table to contain a NULL column."); // # nocov if (asLogical(checkidx) && check_idx(idx, length(x), &anyNA) != NULL) // , &orderedSubset error("Internal error: CsubsetVector is internal-use-only but has received negatives, zeros or out-of-range"); // # nocov SEXP ans = PROTECT(allocVector(TYPEOF(x), length(idx))); nprotect++; copyMostAttrib(x, ans); subsetVectorRaw(ans, x, idx, anyNA); UNPROTECT(nprotect); return ans; } collapse/src/seqid_groupid.cpp0000644000176200001440000004136213767070357016232 0ustar liggesusers// [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; // TODO: Optimize ! // TODO: can do something about doubles using == ? // TODO: Option na_fill ? // Note: For x[i] == NA_INTEGER, which is equal to INT_MIN, cannot calculate x[i]-prev ! -> fixed in 1.2.1 // https://stackoverflow.com/questions/776624/whats-faster-iterating-an-stl-vector-with-vectoriterator-or-with-at // [[Rcpp::export]] IntegerVector seqid(const IntegerVector& x, const SEXP& o = R_NilValue, int del = 1, int start = 1, bool na_skip = false, bool skip_seq = false, bool check_o = true) { int l = x.size(), id = start, prev; if(l < 1) return x; // Prevents seqfault for numeric(0) #101 IntegerVector out = no_init_vector(l); if(Rf_isNull(o)) { if(na_skip) { int j = 0, end = l-1; while(x[j] == NA_INTEGER && j != end) out[j++] = NA_INTEGER; if(j != end) { prev = x[j]; out[j] = id; for(int i = j+1; i != l; ++i) { if(x[i] != NA_INTEGER) { if(x[i] - prev != del) ++id; // x[i]-x[i-1]? prev = x[i]; out[i] = id; } else { // Faster way ? out[i] = NA_INTEGER; if(skip_seq) prev += del; } } } } else { int nafill = INT_MAX - 1e7; prev = x[0]; if(prev == NA_INTEGER) prev = nafill; out[0] = id; for(int i = 1; i != l; ++i) { if(x[i] == NA_INTEGER) { ++id; prev = nafill; } else { if(x[i] - prev != del) ++id; prev = x[i]; } out[i] = id; } } } else { IntegerVector oo = o; if(oo.size() != l) stop("length(o) must match length(x)"); int val(oo[0]-1); if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); if(na_skip) { int j = 0, end = l-1; if(check_o) { while(x[val] == NA_INTEGER && j != end) { out[val] = NA_INTEGER; val = oo[++j]-1; if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); } if(j != end) { prev = x[val]; out[val] = id; for(int i = j+1; i != l; ++i) { val = oo[i]-1; if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); if(x[val] != NA_INTEGER) { if(x[val] - prev != del) ++id; // x[i]-x[i-1]? prev = x[val]; out[val] = id; } else { out[val] = NA_INTEGER; if(skip_seq) prev += del; } } } } else { while(x[val] == NA_INTEGER && j != end) { out[val] = NA_INTEGER; val = oo[++j]-1; } if(j != end) { prev = x[val]; out[val] = id; for(int i = j+1; i != l; ++i) { val = oo[i]-1; if(x[val] != NA_INTEGER) { if(x[val] - prev != del) ++id; // x[i]-x[i-1]? prev = x[val]; out[val] = id; } else { out[val] = NA_INTEGER; if(skip_seq) prev += del; } } } } } else { int nafill = INT_MAX - 1e7; prev = x[val]; if(prev == NA_INTEGER) prev = nafill; out[val] = id; // faster than iterator ? if(check_o) { for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; val = oo[i]-1; if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); if(x[val] == NA_INTEGER) { ++id; prev = nafill; } else { if(x[val] - prev != del) ++id; prev = x[val]; } out[val] = id; } } else { for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; val = oo[i]-1; if(x[val] == NA_INTEGER) { ++id; prev = nafill; } else { if(x[val] - prev != del) ++id; prev = x[val]; } out[val] = id; } } } } out.attr("N.groups") = id - start + 1; if(start == 1) Rf_classgets(out, na_skip ? CharacterVector::create("qG") : CharacterVector::create("qG", "na.included")); return out; } // TODO: Make unique argument and generalize to all vector input types !! Or starts ?? -> Nah, GRP already does that. need to think harder. First publish without.. // The problem with groups or starts is also that you either have to dynamically fill a vector or do a second iteration... // Rather have it process starts attribute from radixorder... template IntegerVector groupidImpl(Vector x, SEXP o, int start, bool na_skip, bool check_o) { int l = x.size(), id = start; if(l < 1) return IntegerVector(0); // Prevents seqfault for numeric(0) #101 typedef typename Rcpp::traits::storage_type::type storage_t; auto isnanT = (RTYPE == REALSXP) ? [](storage_t x) { return x != x; } : [](storage_t x) { return x == Vector::get_na(); }; storage_t prev; IntegerVector out = no_init_vector(l); if(Rf_isNull(o)) { if(na_skip) { int j = 0, end = l-1; while(isnanT(x[j]) && j != end) out[j++] = NA_INTEGER; if(j != end) { prev = x[j]; out[j] = id; for(int i = j+1; i != l; ++i) { if(!isnanT(x[i])) { if(x[i] != prev) { ++id; prev = x[i]; } out[i] = id; } else out[i] = NA_INTEGER; } } } else { prev = x[0]; out[0] = id; if(RTYPE == REALSXP) { for(int i = 1; i != l; ++i) { if(x[i] != prev) { if(!(prev != prev && isnanT(x[i]))) ++id; prev = x[i]; } out[i] = id; } } else { for(int i = 1; i != l; ++i) { if(x[i] != prev) { ++id; prev = x[i]; } out[i] = id; } } } } else { IntegerVector oo = o; if(oo.size() != l) stop("length(o) must match length(x)"); int val(oo[0]-1); if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); if(na_skip) { int j = 0, end = l-1; if(check_o) { while(isnanT(x[val]) && j != end) { out[val] = NA_INTEGER; val = oo[++j]-1; if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); } if(j != end) { prev = x[val]; out[val] = id; for(int i = j+1; i != l; ++i) { val = oo[i]-1; if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); if(!isnanT(x[val])) { if(x[val] != prev) { ++id; prev = x[val]; } out[val] = id; } else out[val] = NA_INTEGER; } } } else { while(isnanT(x[val]) && j != end) { out[val] = NA_INTEGER; val = oo[++j]-1; } if(j != end) { prev = x[val]; out[val] = id; for(int i = j+1; i != l; ++i) { val = oo[i]-1; if(!isnanT(x[val])) { if(x[val] != prev) { ++id; prev = x[val]; } out[val] = id; } else out[val] = NA_INTEGER; } } } } else { prev = x[val]; out[val] = id; // faster than iterator ? if(RTYPE == REALSXP) { if(check_o) { for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; val = oo[i]-1; if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); if(x[val] != prev) { if(!(prev != prev && isnanT(x[val]))) ++id; prev = x[val]; } out[val] = id; } } else { for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; val = oo[i]-1; if(x[val] != prev) { if(!(prev != prev && isnanT(x[val]))) ++id; prev = x[val]; } out[val] = id; } } } else { if(check_o) { for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; val = oo[i]-1; if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); if(x[val] != prev) { ++id; prev = x[val]; } out[val] = id; } } else { for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; val = oo[i]-1; if(x[val] != prev) { ++id; prev = x[val]; } out[val] = id; } } } } } out.attr("N.groups") = id - start + 1; if(start == 1) Rf_classgets(out, na_skip ? CharacterVector::create("qG") : CharacterVector::create("qG", "na.included")); return out; } template <> IntegerVector groupidImpl(Vector x, SEXP o, int start, bool na_skip, bool check_o) { stop("Not supported SEXP type!"); } template <> IntegerVector groupidImpl(Vector x, SEXP o, int start, bool na_skip, bool check_o) { stop("Not supported SEXP type!"); } template <> IntegerVector groupidImpl(Vector x, SEXP o, int start, bool na_skip, bool check_o) { stop("Not supported SEXP type!"); } template <> IntegerVector groupidImpl(Vector x, SEXP o, int start, bool na_skip, bool check_o) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] IntegerVector groupid(const SEXP& x, const SEXP& o = R_NilValue, int start = 1, bool na_skip = false, bool check_o = true) { RCPP_RETURN_VECTOR(groupidImpl, x, o, start, na_skip, check_o); } // Integer Version // // [[Rcpp::export]] // IntegerVector groupid(const IntegerVector& x, const SEXP& o = R_NilValue, int start = 1, // bool na_skip = false, bool check_o = true) { // int l = x.size(), prev, id = start; // IntegerVector out = no_init_vector(l); // if(Rf_isNull(o)) { // if(na_skip) { // int j = 0, end = l-1; // while(x[j] == NA_INTEGER && j != end) out[j++] = NA_INTEGER; // if(j != end) { // prev = x[j]; // out[j] = id; // for(int i = j+1; i != l; ++i) { // if(x[i] != NA_INTEGER) { // if(x[i] != prev) { // ++id; // prev = x[i]; // } // out[i] = id; // } else out[i] = NA_INTEGER; // } // } // } else { // prev = x[0]; // out[0] = id; // for(int i = 1; i != l; ++i) { // if(x[i] != prev) { // ++id; // prev = x[i]; // } // out[i] = id; // } // } // } else { // IntegerVector oo = o; // int val(oo[0]-1); // if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); // if(na_skip) { // int j = 0, end = l-1; // if(check_o) { // while(x[val] == NA_INTEGER && j != end) { // out[val] = NA_INTEGER; // val = oo[++j]-1; // if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); // } // if(j != end) { // prev = x[val]; // out[val] = id; // for(int i = j+1; i != l; ++i) { // val = oo[i]-1; // if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); // if(x[val] != NA_INTEGER) { // if(x[val] != prev) { // ++id; // prev = x[val]; // } // out[val] = id; // } else out[val] = NA_INTEGER; // } // } // } else { // while(x[val] == NA_INTEGER && j != end) { // out[val] = NA_INTEGER; // val = oo[++j]-1; // } // if(j != end) { // prev = x[val]; // out[val] = id; // for(int i = j+1; i != l; ++i) { // val = oo[i]-1; // if(x[val] != NA_INTEGER) { // if(x[val] != prev) { // ++id; // prev = x[val]; // } // out[val] = id; // } else out[val] = NA_INTEGER; // } // } // } // } else { // prev = x[val]; // out[val] = id; // faster than iterator ?? // if(check_o) { // for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; // val = oo[i]-1; // if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); // if(x[val] != prev) { // ++id; // prev = x[val]; // } // out[val] = id; // } // } else { // for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; // val = oo[i]-1; // if(x[val] != prev) { // ++id; // prev = x[val]; // } // out[val] = id; // } // } // } // } // out.attr("N.groups") = id; // out.attr("class") = na_skip ? "qG" : CharacterVector::create("qG", "na.included"); // return out; // } // // Simple first versions // // // [[Rcpp::export]] // IntegerVector groupid(const IntegerVector& x, const SEXP& o = R_NilValue, bool check = true) { // int l = x.size(), prev, id = 1; // IntegerVector out = no_init_vector(l); // if(Rf_isNull(o)) { // prev = x[0]; // out[0] = 1; // for(int i = 1; i != l; ++i) { // if(x[i] != prev) { // ++id; // prev = x[i]; // } // out[i] = id; // } // } else { // IntegerVector oo = o; // int val(oo[0]-1); // prev = x[val]; // https://stackoverflow.com/questions/776624/whats-faster-iterating-an-stl-vector-with-vectoriterator-or-with-at // out[val] = 1; // faster than iterator ?? // if(check) { // for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; // val = oo[i]-1; // if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); // if(x[val] != prev) { // ++id; // prev = x[val]; // } // out[val] = id; // } // } else { // for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; // val = oo[i]-1; // if(x[val] != prev) { // ++id; // prev = x[val]; // } // out[val] = id; // } // } // } // out.attr("N.groups") = id; // out.attr("class") = CharacterVector::create("qG","na.included"); // return out; // } // // // [[Rcpp::export]] // IntegerVector groupid(const IntegerVector& x, const SEXP& o = R_NilValue, bool check = true) { // int l = x.size(), prev, id = 1; // IntegerVector out = no_init_vector(l); // if(Rf_isNull(o)) { // prev = x[0]; // out[0] = 1; // for(int i = 1; i != l; ++i) { // if(x[i] != prev) { // ++id; // prev = x[i]; // } // out[i] = id; // } // } else { // IntegerVector oo = o; // int val(oo[0]-1); // prev = x[val]; // https://stackoverflow.com/questions/776624/whats-faster-iterating-an-stl-vector-with-vectoriterator-or-with-at // out[val] = 1; // faster than iterator ?? // if(check) { // for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; // val = oo[i]-1; // if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); // if(x[val] != prev) { // ++id; // prev = x[val]; // } // out[val] = id; // } // } else { // for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; // val = oo[i]-1; // if(x[val] != prev) { // ++id; // prev = x[val]; // } // out[val] = id; // } // } // } // out.attr("N.groups") = id; // out.attr("class") = CharacterVector::create("qG","na.included"); // return out; // } collapse/src/kit.h0000644000176200001440000001120214170236467013611 0ustar liggesusers/* This code is adapted from the kit package: https://github.com/2005m/kit and licensed under a GPL-3.0 license. */ #include // #include // #include // #if !defined(R_VERSION) || R_VERSION < R_Version(3, 5, 0) // #define USE_RINTERNALS // #define DATAPTR_RO(x) ((const void *)DATAPTR(x)) // #endif #include // #include #include // needed for intptr_t on linux // #include // #ifdef _OPENMP // #include // #define omp_enabled true // #define max_thread omp_get_num_procs() // #define min_thread 1 // #define OMP_PARALLEL_FOR(nth) _Pragma("omp parallel for num_threads(nth)") // #else // #define omp_enabled false // #define max_thread 1 // #define min_thread 1 // #define omp_get_thread_num() 0 // #define OMP_PARALLEL_FOR(n) // #endif // #if !defined SSIZE_MAX // #define SSIZE_MAX LLONG_MAX // #endif // #define UTYPEOF(x) ((unsigned)TYPEOF(x)) // #define IS_BOOL(x) (LENGTH(x)==1 && TYPEOF(x)==LGLSXP && LOGICAL(x)[0]!=NA_LOGICAL) // #define IS_VALID_TYPE(x) ((x) == LGLSXP || (x)==INTSXP || (x)==REALSXP || (x)==CPLXSXP || (x)==STRSXP || (x)==VECSXP) // #define PTR_ETL(x, y) (((const SEXP *)DATAPTR_RO(x))[y]) // #define SEXPPTR_RO(x) ((const SEXP *)DATAPTR_RO(x)) // #define ISNA_COMPLEX(x) (ISNA(x.r) || ISNA(x.i)) // #define ISNAN_COMPLEX(x) (ISNAN(x.r) || ISNAN(x.i)) // #define EQUAL_CPLX(x, y) (((x.r) == (y.r)) && ((x.i) == (y.i))) // #define RCHAR(x, y) CHAR(STRING_ELT(x, y)) // #define SEXP_F ScalarLogical(FALSE) // #define SEXP_T ScalarLogical(TRUE) #define HASH(key, K) (3141592653U * (unsigned int)(key) >> (32 - (K))) #define N_ISNAN(x, y) (!ISNAN(x) && !ISNAN(y)) #define B_IsNA(x, y) (R_IsNA(x) && R_IsNA(y)) #define B_IsNaN(x, y) (R_IsNaN(x) && R_IsNaN(y)) #define B_ISNAN(x, y) (ISNAN(x) && ISNAN(y)) #define C_IsNA(x) (R_IsNA(x.r) || R_IsNA(x.i)) #define C_IsNaN(x) (R_IsNaN(x.r) || R_IsNaN(x.i)) #define C_ISNAN(x, y) (B_ISNAN(x, y) || (N_ISNAN(x, y) && x == y)) #define REQUAL(x, y) (N_ISNAN(x, y) ? (x == y) : (B_IsNA(x, y) || B_IsNaN(x, y))) #define CEQUAL(x, y) ((N_ISNAN(x.r, x.i) && N_ISNAN(y.r, y.i)) ? (x.r == y.r && x.i == y.i) : (C_IsNA(x) ? C_IsNA(y) : (C_IsNA(y) ? 0 : (C_ISNAN(x.r, y.r) && C_ISNAN(x.i, y.i))))) // #define STR_DF mkString("data.frame") // #define MAX(a,b) (((a)>(b))?(a):(b)) // #define IS_LOGICAL(x) (isLogical(x) && LENGTH(x)==1) // extern SEXP addColToDataFrame(SEXP df, SEXP mcol, SEXP coln); // extern SEXP callToOrder (SEXP x, const char* method, bool desc, Rboolean na, SEXP env); // extern SEXP charToFactR(SEXP x, SEXP decreasingArg, SEXP nthread, SEXP nalast, SEXP env, SEXP addNA); // extern SEXP countR(SEXP x, SEXP y); // extern SEXP countNAR(SEXP x); // extern SEXP countOccurR(SEXP x); // extern SEXP countOccurDataFrameR(SEXP x); // extern SEXP cpsortR(SEXP x, SEXP decreasing, SEXP nthread, SEXP nalast, SEXP env, SEXP index, SEXP clocale); // extern SEXP dfToMatrix(SEXP df); // extern SEXP dupR(SEXP x, SEXP uniq, SEXP fromLast); // extern SEXP dupVecR(SEXP x, SEXP uniq, SEXP fromLast); // extern SEXP dupVecIndexOnlyR(SEXP x); // extern SEXP dupDataFrameR(SEXP x, SEXP uniq, SEXP fromLast); // extern SEXP dupMatrixR(SEXP x, SEXP uniq, Rboolean idx, SEXP fromLast); // extern SEXP dupLenR(SEXP x); // extern SEXP dupLenDataFrameR(SEXP x); // extern SEXP dupLenMatrixR(SEXP x); // extern SEXP dupLenVecR(SEXP x); // extern SEXP fposR(SEXP needle, SEXP haystack, SEXP all, SEXP overlap); // extern SEXP fposMatR(SEXP needle, SEXP haystack, SEXP all, SEXP overlap); // extern SEXP fposVectR(SEXP ndle, SEXP hsk, SEXP all, SEXP overlap); // extern SEXP iifR(SEXP l, SEXP a, SEXP b, SEXP na, SEXP tprom, SEXP nthreads); // extern SEXP nifR(SEXP na, SEXP rho, SEXP args); // extern SEXP nifInternalR(SEXP na, SEXP rho, SEXP args); // extern SEXP nswitchR(SEXP x, SEXP na, SEXP nthreads, SEXP chkenc, SEXP args); // extern SEXP ompEnabledR(); // extern SEXP pallR(SEXP na, SEXP args); // extern SEXP panyR(SEXP na, SEXP args); // extern SEXP pcountR(SEXP x, SEXP args); // extern SEXP pmeanR(SEXP na, SEXP args); // extern SEXP pprodR(SEXP na, SEXP args); // extern SEXP psumR(SEXP na, SEXP args); // extern SEXP setlevelsR(SEXP x, SEXP old_lvl, SEXP new_lvl, SEXP skip_absent); // extern SEXP subSetColDataFrame(SEXP df, SEXP str); // extern SEXP subSetColMatrix(SEXP x, R_xlen_t idx); // extern SEXP subSetRowDataFrame(SEXP df, SEXP rws); // extern SEXP subSetRowMatrix(SEXP mat, SEXP rws); // extern SEXP topnR(SEXP vec, SEXP n, SEXP dec, SEXP hasna, SEXP env); // extern SEXP vswitchR(SEXP x, SEXP values, SEXP outputs, SEXP na, SEXP nthreads, SEXP chkenc); union uno { double d; unsigned int u[2]; }; // bool isMixEnc(SEXP x); // SEXP enc2UTF8(SEXP x); collapse/src/fbstats.cpp0000644000176200001440000012320114174223734015023 0ustar liggesusers// [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; // TODO: Still check printing (naming and setting classes) options // inline bool isnan2(double x) { // return x != x; // } // use constant references on the temp function also ? NumericVector fbstatstemp(NumericVector x, bool ext = false, int ng = 0, IntegerVector g = 0, SEXP w = R_NilValue, bool setn = true, bool stable_algo = true, SEXP gn = R_NilValue) { int l = x.size(); if(!ext) { if(ng == 0) { // No groups int j = l-1; // double n = 0, min = R_PosInf, max = R_NegInf; // long double mean = 0, d1 = 0, M2 = 0; double n = 0, min = R_PosInf, max = R_NegInf, mean = 0, d1 = 0, M2 = 0; if(Rf_isNull(w)) { // No weights while(std::isnan(x[j]) && j!=0) --j; if(j != 0) { // if(j == 0) stop("Not enough non-mising obs."); if(stable_algo) { for(int i = j+1; i--; ) { if(std::isnan(x[i])) continue; d1 = x[i]-mean; mean += d1 * (1 / ++n); M2 += d1*(x[i]-mean); if(min > x[i]) min = x[i]; if(max < x[i]) max = x[i]; } M2 = sqrt(M2/(n-1)); } else { int k = 0; long double sum = 0, sq_sum = 0; for(int i = j+1; i--; ) { d1 = x[i]; if(std::isnan(d1)) continue; sum += d1; sq_sum += d1 * d1; if(min > d1) min = d1; if(max < d1) max = d1; ++k; } sum /= k; sq_sum -= sum*sum*k; M2 = (double)sqrt(sq_sum/(k-1)); n = (double)k; mean = (double)sum; } } else mean = M2 = min = max = NA_REAL; } else { // with weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); // long double sumw = 0; double sumw = 0; while((std::isnan(x[j]) || std::isnan(wg[j]) || wg[j] == 0) && j!=0) --j; if(j != 0) { // if(j == 0) stop("Not enough non-mising obs."); for(int i = j+1; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; sumw += wg[i]; d1 = x[i] - mean; mean += d1 * (wg[i] / sumw); M2 += wg[i] * d1 * (x[i] - mean); ++n; if(min > x[i]) min = x[i]; if(max < x[i]) max = x[i]; } M2 = sqrt(M2/(sumw-1)); } else mean = M2 = min = max = NA_REAL; } if(std::isnan(M2)) M2 = NA_REAL; NumericVector result = NumericVector::create(n,mean,M2,min,max); // NumericVector::create(n,(double)mean,(double)M2,min,max); if(setn) { Rf_namesgets(result, CharacterVector::create("N","Mean","SD","Min","Max")); Rf_classgets(result, CharacterVector::create("qsu","table")); } return result; } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); // long double d1 = 0; double d1 = 0; int k = 0; NumericMatrix result(ng, 5); // = no_init_matrix initializing is better -> valgrind NumericMatrix::Column n = result( _ , 0); NumericMatrix::Column mean = result( _ , 1); NumericMatrix::Column M2 = result( _ , 2); NumericMatrix::Column min = result( _ , 3); NumericMatrix::Column max = result( _ , 4); std::fill(M2.begin(), M2.end(), NA_REAL); if(Rf_isNull(w)) { // No weights if(stable_algo) { for(int i = l; i--; ) { if(std::isnan(x[i])) continue; k = g[i]-1; if(std::isnan(M2[k])) { mean[k] = min[k] = max[k] = x[i]; M2[k] = 0.0; n[k] = 1.0; } else { d1 = x[i]-mean[k]; mean[k] += d1 * (1 / ++n[k]); M2[k] += d1*(x[i]-mean[k]); if(min[k] > x[i]) min[k] = x[i]; if(max[k] < x[i]) max[k] = x[i]; } } for(int i = ng; i--; ) if(!std::isnan(M2[i])) M2[i] = sqrt(M2[i]/(n[i]-1)); } else { for(int i = l; i--; ) { if(std::isnan(x[i])) continue; k = g[i]-1; if(std::isnan(M2[k])) { mean[k] = min[k] = max[k] = x[i]; M2[k] = pow(x[i],2); n[k] = 1.0; } else { mean[k] += x[i]; M2[k] += pow(x[i],2); if(min[k] > x[i]) min[k] = x[i]; if(max[k] < x[i]) max[k] = x[i]; ++n[k]; } } for(int i = ng; i--; ) { if(std::isnan(M2[i])) continue; mean[i] /= n[i]; M2[i] = sqrt((M2[i] - pow(mean[i],2)*n[i])/(n[i]-1)); if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } else { // with weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); NumericVector sumw(ng); // = no_init_vector(ng); // better for valgrind for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; k = g[i]-1; if(std::isnan(M2[k])) { sumw[k] = wg[i]; mean[k] = min[k] = max[k] = x[i]; M2[k] = 0.0; n[k] = 1.0; } else { sumw[k] += wg[i]; d1 = x[i] - mean[k]; mean[k] += d1 * (wg[i] / sumw[k]); M2[k] += wg[i] * d1 * (x[i] - mean[k]); ++n[k]; if(min[k] > x[i]) min[k] = x[i]; if(max[k] < x[i]) max[k] = x[i]; } } for(int i = ng; i--; ) if(!std::isnan(M2[i])) M2[i] = sqrt(M2[i]/(sumw[i]-1)); } if(setn) { Rf_dimnamesgets(result, List::create(gn, CharacterVector::create("N","Mean","SD","Min","Max"))); Rf_classgets(result, CharacterVector::create("qsu","matrix","table")); } return result; } } else { if(ng == 0) { // No groups int j = l-1; // double n = 0, min = R_PosInf, max = R_NegInf; // long double mean = 0, d1 = 0, dn = 0, dn2 = 0, term1 = 0, M2 = 0, M3 = 0, M4 = 0; double n = 0, min = R_PosInf, max = R_NegInf, mean = 0, d1 = 0, dn = 0, dn2 = 0, term1 = 0, M2 = 0, M3 = 0, M4 = 0; if(Rf_isNull(w)) { // No weights while(std::isnan(x[j]) && j!=0) --j; if(j != 0) { // if(j == 0) stop("Not enough non-mising obs."); for(int i = j+1; i--; ) { if(std::isnan(x[i])) continue; d1 = x[i]-mean; dn = d1 * (1 / ++n); mean += dn; dn2 = dn * dn; term1 = d1 * dn * (n-1); M4 += term1*dn2*(n*n - 3*n + 3) + 6*dn2*M2 - 4*dn*M3; M3 += term1*dn*(n - 2) - 3*dn*M2; M2 += term1; if(min > x[i]) min = x[i]; if(max < x[i]) max = x[i]; } M4 = (n*M4)/(M2*M2); // kurtosis // Excess kurtosis: - 3; M3 = (sqrt(n)*M3) / sqrt(pow(M2,3)); // Skewness M2 = sqrt(M2/(n-1)); // Standard Deviation } else mean = M2 = M3 = M4 = min = max = NA_REAL; } else { // with weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); // long double sumw = 0; double sumw = 0; while((std::isnan(x[j]) || std::isnan(wg[j]) || wg[j] == 0) && j!=0) --j; if(j != 0) { // if(j == 0) stop("Not enough non-mising obs."); for(int i = j+1; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; sumw += wg[i]; // great, this is correct: d1 = x[i]-mean; dn = d1 * (wg[i] / sumw); mean += dn; dn2 = dn * dn; term1 = wg[i] * d1 * dn * (sumw-wg[i]); M4 += term1*dn2*(sumw*sumw - 3*sumw + 3) + 6*dn2*M2 - 4*dn*M3; M3 += term1*dn*(sumw - 2) - 3*dn*M2; M2 += term1; ++n; if(min > x[i]) min = x[i]; if(max < x[i]) max = x[i]; } M4 = (sumw*M4)/(M2*M2); // kurtosis // Excess kurtosis: - 3; M3 = (sqrt(sumw)*M3) / sqrt(pow(M2,3)); // Skewness M2 = sqrt(M2/(sumw-1)); // Standard Deviation } else mean = M2 = M3 = M4 = min = max = NA_REAL; } NumericVector result = NumericVector::create(n,mean,M2,min,max,M3,M4); // NumericVector::create(n,(double)mean,(double)M2,min,max,(double)M3,(double)M4); if(setn) { Rf_namesgets(result, CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt")); Rf_classgets(result, CharacterVector::create("qsu","table")); } return result; } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); double d1 = 0, dn = 0, dn2 = 0, term1 = 0; int k = 0; NumericMatrix result(ng, 7); // = no_init_matrix // Initializing better -> valgrind NumericMatrix::Column n = result( _ , 0); NumericMatrix::Column mean = result( _ , 1); NumericMatrix::Column M2 = result( _ , 2); NumericMatrix::Column min = result( _ , 3); NumericMatrix::Column max = result( _ , 4); NumericMatrix::Column M3 = result( _ , 5); NumericMatrix::Column M4 = result( _ , 6); std::fill(M2.begin(), M2.end(), NA_REAL); if(Rf_isNull(w)) { // No weights for(int i = l; i--; ) { if(std::isnan(x[i])) continue; k = g[i]-1; if(std::isnan(M2[k])) { mean[k] = min[k] = max[k] = x[i]; M2[k] = M3[k] = M4[k] = 0.0; n[k] = 1.0; } else { d1 = x[i]-mean[k]; dn = d1 * (1 / ++n[k]); mean[k] += dn; dn2 = dn * dn; term1 = d1 * dn * (n[k]-1); M4[k] += term1*dn2*(n[k]*n[k] - 3*n[k] + 3) + 6*dn2*M2[k] - 4*dn*M3[k]; M3[k] += term1*dn*(n[k] - 2) - 3*dn*M2[k]; M2[k] += term1; if(min[k] > x[i]) min[k] = x[i]; if(max[k] < x[i]) max[k] = x[i]; } } for(int i = ng; i--; ) { M4[i] = (n[i]*M4[i])/(M2[i]*M2[i]); // kurtosis // Excess kurtosis: - 3; M3[i] = (sqrt(n[i])*M3[i]) / sqrt(pow(M2[i],3)); // Skewness M2[i] = sqrt(M2[i]/(n[i]-1)); // Standard Deviation } } else { // with weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); NumericVector sumw(ng); // = no_init_vector(ng); // better for valgrind for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; k = g[i]-1; if(std::isnan(M2[k])) { sumw[k] = wg[i]; mean[k] = min[k] = max[k] = x[i]; M2[k] = M3[k] = M4[k] = 0.0; n[k] = 1.0; } else { sumw[k] += wg[i]; d1 = x[i]-mean[k]; dn = d1 * (wg[i] / sumw[k]); mean[k] += dn; dn2 = dn * dn; term1 = wg[i] * d1 * dn * (sumw[k]-wg[i]); M4[k] += term1*dn2*(sumw[k]*sumw[k] - 3*sumw[k] + 3) + 6*dn2*M2[k] - 4*dn*M3[k]; M3[k] += term1*dn*(sumw[k] - 2) - 3*dn*M2[k]; M2[k] += term1; ++n[k]; if(min[k] > x[i]) min[k] = x[i]; if(max[k] < x[i]) max[k] = x[i]; } } for(int i = ng; i--; ) { M4[i] = (sumw[i]*M4[i])/(M2[i]*M2[i]); // kurtosis // Excess kurtosis: - 3; M3[i] = (sqrt(sumw[i])*M3[i]) / sqrt(pow(M2[i],3)); // Skewness M2[i] = sqrt(M2[i]/(sumw[i]-1)); // Standard Deviation } } if(setn) { Rf_dimnamesgets(result, List::create(gn, CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt"))); Rf_classgets(result, CharacterVector::create("qsu","matrix","table")); } return result; } } // } else { // detailed summary: fully sorting. Note: This doesn't work grouped, groups must also be sorted -> need to sort within each group or compute ordering // NumericVector y = no_init_vector(l); // auto pend = std::remove_copy_if(x.begin(), x.end(), y.begin(), isnan2); // l = pend - x.begin(); // middle = sz/2-1; // std::sort(y.begin(), pend); // good ?? // // if(dets == 1 && det[0] == 1) det = 5; // } } inline NumericVector replaceC1(NumericMatrix x, NumericVector y, bool div = false) { if(div) { NumericMatrix::Column C1 = x(_, 0); // best ? C1 = C1 / y; } else { x(_, 0) = y; // best way ? use NumericMatrix::Column ? } return x; } // [[Rcpp::export]] SEXP fbstatsCpp(const NumericVector& x, bool ext = false, int ng = 0, const IntegerVector& g = 0, int npg = 0, const IntegerVector& pg = 0, const SEXP& w = R_NilValue, bool stable_algo = true, bool array = true, bool setn = true, const SEXP& gn = R_NilValue) { int l = x.size(), d = (ext) ? 7 : 5; if(npg == 0) { // No panel if(ng == 0) { // No groups return(fbstatstemp(x, ext, 0, 0, w, setn, stable_algo, gn)); } else { return(fbstatstemp(x, ext, ng, g, w, setn, stable_algo, gn)); } } else { if(pg.size() != l) stop("length(pid) must match nrow(X)"); bool weights = !Rf_isNull(w); NumericVector sum(npg, NA_REAL); NumericVector sumw((weights) ? npg : 1); // no_init_vector(npg) : no_init_vector(1); // better for valgrind double osum = 0; if(!weights) { IntegerVector n(npg, 1); for(int i = l; i--; ) { if(!std::isnan(x[i])) { if(std::isnan(sum[pg[i]-1])) sum[pg[i]-1] = x[i]; else { sum[pg[i]-1] += x[i]; ++n[pg[i]-1]; } } } int on = 0; for(int i = npg; i--; ) { // Problem: if one sum remained NA, osum becomes NA (also issue with B and W and TRA) if(std::isnan(sum[i])) continue; // solves the issue osum += sum[i]; on += n[i]; sum[i] /= n[i]; } osum = osum/on; } else { NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); // Note: Skipping zero weights is not really necessary here, but it might be numerically better and also faster if there are many. for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; if(std::isnan(sum[pg[i]-1])) { sum[pg[i]-1] = x[i]*wg[i]; sumw[pg[i]-1] = wg[i]; } else { sum[pg[i]-1] += x[i]*wg[i]; sumw[pg[i]-1] += wg[i]; } } double osumw = 0; for(int i = npg; i--; ) { if(std::isnan(sum[i]) || sumw[i] == 0) continue; // solves the issue osum += sum[i]; osumw += sumw[i]; sum[i] /= sumw[i]; } osum = osum/osumw; // for(int i = npg; i--; ) sumw[i] /= osumw; } NumericVector within = no_init_vector(l); if(ng == 0) { // No groups for(int i = 0; i != l; ++i) within[i] = x[i] - sum[pg[i]-1] + osum; // if-check for NA's is not faster NumericMatrix result = no_init_matrix(3, d); result(0, _) = fbstatstemp(x, ext, 0, 0, w, false, stable_algo); result(1, _) = (weights) ? fbstatstemp(sum, ext, 0, 0, sumw, false, stable_algo) : fbstatstemp(sum, ext, 0, 0, w, false, stable_algo); result(2, _) = fbstatstemp(within, ext, 0, 0, w, false, stable_algo); result[2] /= result[1]; if(setn) { Rf_dimnamesgets(result, List::create(CharacterVector::create("Overall","Between","Within"), (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") : CharacterVector::create("N/T","Mean","SD","Min","Max"))); Rf_classgets(result, CharacterVector::create("qsu","matrix","table")); } return(result); } else { if(g.size() != l) stop("length(g) must match nrow(X)"); NumericVector between = no_init_vector(l); // bool groupids[ng][npg]; // could do +1 trick, but that could be costly in term of memory if few g and many pg LogicalMatrix groupids = no_init_matrix(ng, npg); // memset(groupids, true, sizeof(bool)*ng*npg); // works ? necessary ? std::fill(groupids.begin(), groupids.end(), true); NumericVector gnpids(ng); // best ? for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { // important ? right ? between[i] = within[i] = NA_REAL; // x[i] ? } else { if(groupids(g[i]-1, pg[i]-1)) { // added this part ++gnpids[g[i]-1]; groupids(g[i]-1, pg[i]-1) = false; } between[i] = sum[pg[i]-1]; within[i] = x[i] - between[i] + osum; } } if(array) { NumericMatrix result = no_init_matrix(d*ng, 3); result(_,0) = fbstatstemp(x, ext, ng, g, w, false, stable_algo); result(_,1) = replaceC1(as(fbstatstemp(between, ext, ng, g, w, false, stable_algo)), gnpids); // how to do this ? -> above best approach ? result(_,2) = replaceC1(as(fbstatstemp(within, ext, ng, g, w, false, stable_algo)), gnpids, true); if(setn) { Rf_dimgets(result, Dimension(ng, d, 3)); Rf_dimnamesgets(result, List::create(gn, (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") : CharacterVector::create("N/T","Mean","SD","Min","Max"), CharacterVector::create("Overall","Between","Within"))); Rf_classgets(result, CharacterVector::create("qsu","array","table")); } return(result); } else { List result(3); // option array ? result[0] = fbstatstemp(x, ext, ng, g, w, true, stable_algo, gn); result[1] = replaceC1(as(fbstatstemp(between, ext, ng, g, w, true, stable_algo, gn)), gnpids); // how to do this ? -> above best approach ? result[2] = replaceC1(as(fbstatstemp(within, ext, ng, g, w, true, stable_algo, gn)), gnpids, true); Rf_namesgets(result, CharacterVector::create("Overall","Between","Within")); return(result); } } } } // [[Rcpp::export]] SEXP fbstatsmCpp(const NumericMatrix& x, bool ext = false, int ng = 0, const IntegerVector& g = 0, int npg = 0, const IntegerVector& pg = 0, const SEXP& w = R_NilValue, bool stable_algo = true, bool array = true, const SEXP& gn = R_NilValue) { int col = x.ncol(), d = (ext) ? 7 : 5; // l = x.nrow(), if(npg == 0) { // No panel if(ng == 0) { // No groups NumericMatrix out = no_init_matrix(col, d); for(int j = col; j--; ) out(j, _) = fbstatstemp(x(_, j), ext, 0, 0, w, false, stable_algo); Rf_dimnamesgets(out, List::create(colnames(x), (ext) ? CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt") : CharacterVector::create("N","Mean","SD","Min","Max"))); Rf_classgets(out, CharacterVector::create("qsu","matrix","table")); return out; } else { // if(g.size() != l) stop("length(g) must match nrow(X)"); // checked in fbstatstemp if(array) { NumericMatrix out = no_init_matrix(d*ng, col); for(int j = col; j--; ) out(_, j) = fbstatstemp(x(_, j), ext, ng, g, w, false, stable_algo); Rf_dimgets(out, Dimension(ng, d, col)); Rf_dimnamesgets(out, List::create(gn, (ext) ? CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt") : CharacterVector::create("N","Mean","SD","Min","Max"), colnames(x))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { List out(col); for(int j = col; j--; ) out[j] = fbstatstemp(x(_, j), ext, ng, g, w, true, stable_algo, gn); Rf_setAttrib(out, R_NamesSymbol, colnames(x)); return out; } } } else { if(ng == 0) { if(array) { NumericMatrix out = no_init_matrix(d*3, col); for(int j = col; j--; ) out(_, j) = as(fbstatsCpp(x(_, j), ext, 0, 0, npg, pg, w, stable_algo, true, false)); // or Rf_coerce ? Rf_dimgets(out, Dimension(3, d, col)); Rf_dimnamesgets(out, List::create(CharacterVector::create("Overall","Between","Within"), (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") : CharacterVector::create("N/T","Mean","SD","Min","Max"), colnames(x))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { List out(col); for(int j = col; j--; ) out[j] = fbstatsCpp(x(_, j), ext, 0, 0, npg, pg, w, stable_algo, false, true, gn); Rf_setAttrib(out, R_NamesSymbol, colnames(x)); return out; } } else { if(array) { NumericMatrix out = no_init_matrix(d*3*ng, col); for(int j = col; j--; ) out(_, j) = as(fbstatsCpp(x(_, j), ext, ng, g, npg, pg, w, stable_algo, true, false)); // or Rf_coerce ? Rf_dimgets(out, IntegerVector::create(ng, d, 3, col)); Rf_dimnamesgets(out, List::create(gn, (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") : CharacterVector::create("N/T","Mean","SD","Min","Max"), CharacterVector::create("Overall","Between","Within"), colnames(x))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { List out(col); for(int j = col; j--; ) out[j] = fbstatsCpp(x(_, j), ext, ng, g, npg, pg, w, stable_algo, false, true, gn); Rf_setAttrib(out, R_NamesSymbol, colnames(x)); return out; } } } } template NumericVector fnobs5Impl(Vector x, bool ext = false, int ng = 0, IntegerVector g = 0, bool real = false, bool setn = false, SEXP gn = R_NilValue) { int l = x.size(), d = (ext) ? 7 : 5; if(ng == 0) { int n = 0; if(real) { for(int i = 0; i != l; ++i) if(x[i] == x[i]) ++n; // This loop is faster } else { for(int i = 0; i != l; ++i) if(x[i] != Vector::get_na()) ++n; } NumericVector out(d, NA_REAL); if(setn) { Rf_namesgets(out, (ext) ? CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt") : CharacterVector::create("N","Mean","SD","Min","Max")); Rf_classgets(out, CharacterVector::create("qsu","table")); } out[0] = (double)n; return out; } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); NumericMatrix out = no_init_matrix(ng, d); std::fill_n(out.begin(), ng, 0.0); // works ?? -> yes std::fill(out.begin()+ng, out.end(), NA_REAL); NumericMatrix::Column n = out(_, 0); if(real) { for(int i = 0; i != l; ++i) if(x[i] == x[i]) ++n[g[i]-1]; } else { for(int i = 0; i != l; ++i) if(x[i] != Vector::get_na()) ++n[g[i]-1]; } if(setn) { Rf_dimnamesgets(out, List::create(gn, (ext) ? CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt") : CharacterVector::create("N","Mean","SD","Min","Max"))); Rf_classgets(out, CharacterVector::create("qsu","matrix","table")); } return out; } } template NumericMatrix fnobs5pImpl(Vector x, bool ext = false, int ng = 0, IntegerVector g = 0, int npg = 0, IntegerVector pg = 0, bool real = false, bool array = true, SEXP gn = R_NilValue) { int l = x.size(), d = (ext) ? 7 : 5; if(pg.size() != l) stop("length(pid) must match nrow(X)"); if(ng == 0) { int n = 0, npgc = 0; // bool npgs[npg+1]; // memset(npgs, true, sizeof(bool)*(npg+1)); std::vector npgs(npg+1, true); if(real) { for(int i = 0; i != l; ++i) { if(x[i] == x[i]) ++n; if(npgs[pg[i]-1]) { ++npgc; npgs[pg[i]-1] = false; } } } else { for(int i = 0; i != l; ++i) { if(x[i] != Vector::get_na()) ++n; if(npgs[pg[i]-1]) { ++npgc; npgs[pg[i]-1] = false; } } } NumericMatrix out = no_init_matrix(3, d); out[0] = (double)n; out[1] = (double)npgc; out[2] = out[0]/out[1]; std::fill(out.begin()+3, out.end(), NA_REAL); if(!array) { Rf_dimnamesgets(out, List::create(CharacterVector::create("Overall","Between","Within"), (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") : CharacterVector::create("N/T","Mean","SD","Min","Max"))); Rf_classgets(out, CharacterVector::create("qsu","matrix","table")); } return out; } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); NumericMatrix out = no_init_matrix(ng*d, 3); std::fill_n(out.begin(), ng, 0.0); // works ? -> yes std::fill(out.begin()+ng, out.end(), NA_REAL); NumericMatrix::Column n = out(_, 0); NumericMatrix::Column gnpids = out(_, 1); std::fill_n(gnpids.begin(), ng, 0.0); // bool groupids[ng][npg]; // could do +1 trick, but that could be costly in term of memory, if few g and many pg // memset(groupids, true, sizeof(bool)*ng*npg); LogicalMatrix groupids = no_init_matrix(ng, npg); std::fill(groupids.begin(), groupids.end(), true); if(real) { for(int i = 0; i != l; ++i) { if(x[i] == x[i]) { ++n[g[i]-1]; if(groupids(g[i]-1, pg[i]-1)) { ++gnpids[g[i]-1]; groupids(g[i]-1, pg[i]-1) = false; } } } } else { for(int i = 0; i != l; ++i) { if(x[i] != Vector::get_na()) { ++n[g[i]-1]; if(groupids(g[i]-1, pg[i]-1)) { ++gnpids[g[i]-1]; groupids(g[i]-1, pg[i]-1) = false; } } } } NumericMatrix::Column nt = out(_, 2); for(int i = 0; i != ng; ++i) nt[i] = n[i] / gnpids[i]; if(!array) { Rf_dimgets(out, Dimension(ng, d, 3)); Rf_dimnamesgets(out, List::create(gn, (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") : CharacterVector::create("N/T","Mean","SD","Min","Max"), CharacterVector::create("Overall","Between","Within"))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); } return out; } } // [[Rcpp::export]] SEXP fbstatslCpp(const List& x, bool ext = false, int ng = 0, const IntegerVector& g = 0, int npg = 0, const IntegerVector& pg = 0, const SEXP& w = R_NilValue, bool stable_algo = true, bool array = true, const SEXP& gn = R_NilValue) { int col = x.size(), d = (ext) ? 7 : 5; if(npg == 0) { // No panel if(ng == 0) { // No groups NumericMatrix out = no_init_matrix(col, d); for(int j = col; j--; ) { switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; if(Rf_isObject(column)) out(j, _) = fnobs5Impl(column, ext, 0, 0, true); else out(j, _) = fbstatstemp(column, ext, 0, 0, w, false, stable_algo); break; } case INTSXP: { IntegerVector column = x[j]; if(Rf_isObject(column)) out(j, _) = fnobs5Impl(column, ext); else out(j, _) = fbstatstemp(x[j], ext, 0, 0, w, false, stable_algo); break; } case STRSXP: out(j, _) = fnobs5Impl(x[j], ext); break; case LGLSXP: out(j, _) = fnobs5Impl(x[j], ext); break; default: stop("Not supported SEXP type!"); } } Rf_dimnamesgets(out, List::create(Rf_getAttrib(x, R_NamesSymbol), (ext) ? CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt") : CharacterVector::create("N","Mean","SD","Min","Max"))); Rf_classgets(out, CharacterVector::create("qsu","matrix","table")); return out; } else { if(array) { NumericMatrix out = no_init_matrix(d*ng, col); for(int j = col; j--; ) { switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; if(Rf_isObject(column)) out(_, j) = fnobs5Impl(column, ext, ng, g, true); else out(_, j) = fbstatstemp(column, ext, ng, g, w, false, stable_algo); break; } case INTSXP: { IntegerVector column = x[j]; if(Rf_isObject(column)) out(_, j) = fnobs5Impl(column, ext, ng, g); else out(_, j) = fbstatstemp(x[j], ext, ng, g, w, false, stable_algo); break; } case STRSXP: out(_, j) = fnobs5Impl(x[j], ext, ng, g); break; case LGLSXP: out(_, j) = fnobs5Impl(x[j], ext, ng, g); break; default: stop("Not supported SEXP type!"); } } Rf_dimgets(out, Dimension(ng, d, col)); Rf_dimnamesgets(out, List::create(gn, (ext) ? CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt") : CharacterVector::create("N","Mean","SD","Min","Max"), Rf_getAttrib(x, R_NamesSymbol))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { List out(col); for(int j = col; j--; ) { switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; if(Rf_isObject(column)) out[j] = fnobs5Impl(column, ext, ng, g, true, true, gn); else out[j] = fbstatstemp(column, ext, ng, g, w, true, stable_algo, gn); break; } case INTSXP: { IntegerVector column = x[j]; if(Rf_isObject(column)) out[j] = fnobs5Impl(column, ext, ng, g, false, true, gn); else out[j] = fbstatstemp(x[j], ext, ng, g, w, true, stable_algo, gn); break; } case STRSXP: out[j] = fnobs5Impl(x[j], ext, ng, g, false, true, gn); break; case LGLSXP: out[j] = fnobs5Impl(x[j], ext, ng, g, false, true, gn); break; default: stop("Not supported SEXP type!"); } } Rf_setAttrib(out, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return out; } } } else { // with panel if(ng == 0) { if(array) { NumericMatrix out = no_init_matrix(d*3, col); for(int j = col; j--; ) { switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; if(Rf_isObject(column)) out(_, j) = fnobs5pImpl(column, ext, 0, 0, npg, pg, true); else out(_, j) = as(fbstatsCpp(column, ext, 0, 0, npg, pg, w, stable_algo, true, false)); // or Rf_coerce ? break; } case INTSXP: { IntegerVector column = x[j]; if(Rf_isObject(column)) out(_, j) = fnobs5pImpl(column, ext, 0, 0, npg, pg); else out(_, j) = as(fbstatsCpp(x[j], ext, 0, 0, npg, pg, w, stable_algo, true, false)); break; } case STRSXP: out(_, j) = fnobs5pImpl(x[j], ext, 0, 0, npg, pg); break; case LGLSXP: out(_, j) = fnobs5pImpl(x[j], ext, 0, 0, npg, pg); break; default: stop("Not supported SEXP type!"); } } Rf_dimgets(out, Dimension(3, d, col)); Rf_dimnamesgets(out, List::create(CharacterVector::create("Overall","Between","Within"), (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") : CharacterVector::create("N/T","Mean","SD","Min","Max"), Rf_getAttrib(x, R_NamesSymbol))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { List out(col); for(int j = col; j--; ) { switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; if(Rf_isObject(column)) out[j] = fnobs5pImpl(column, ext, 0, 0, npg, pg, true, false, gn); else out[j] = fbstatsCpp(column, ext, 0, 0, npg, pg, w, stable_algo, false, true, gn); break; } case INTSXP: { IntegerVector column = x[j]; if(Rf_isObject(column)) out[j] = fnobs5pImpl(column, ext, 0, 0, npg, pg, false, false, gn); else out[j] = fbstatsCpp(x[j], ext, 0, 0, npg, pg, w, stable_algo, false, true, gn); break; } case STRSXP: out[j] = fnobs5pImpl(x[j], ext, 0, 0, npg, pg, false, false, gn); break; case LGLSXP: out[j] = fnobs5pImpl(x[j], ext, 0, 0, npg, pg, false, false, gn); break; default: stop("Not supported SEXP type!"); } } Rf_setAttrib(out, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return out; } } else { if(array) { NumericMatrix out = no_init_matrix(d*3*ng, col); for(int j = col; j--; ) { switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; if(Rf_isObject(column)) out(_, j) = fnobs5pImpl(column, ext, ng, g, npg, pg, true); else out(_, j) = as(fbstatsCpp(column, ext, ng, g, npg, pg, w, stable_algo, true, false)); // or Rf_coerce ? break; } case INTSXP: { IntegerVector column = x[j]; if(Rf_isObject(column)) out(_, j) = fnobs5pImpl(column, ext, ng, g, npg, pg); else out(_, j) = as(fbstatsCpp(x[j], ext, ng, g, npg, pg, w, stable_algo, true, false)); break; } case STRSXP: out(_, j) = fnobs5pImpl(x[j], ext, ng, g, npg, pg); break; case LGLSXP: out(_, j) = fnobs5pImpl(x[j], ext, ng, g, npg, pg); break; default: stop("Not supported SEXP type!"); } } Rf_dimgets(out, IntegerVector::create(ng, d, 3, col)); Rf_dimnamesgets(out, List::create(gn, (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") : CharacterVector::create("N/T","Mean","SD","Min","Max"), CharacterVector::create("Overall","Between","Within"), Rf_getAttrib(x, R_NamesSymbol))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { List out(col); for(int j = col; j--; ) { switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; if(Rf_isObject(column)) out[j] = fnobs5pImpl(column, ext, ng, g, npg, pg, true, false, gn); else out[j] = fbstatsCpp(column, ext, ng, g, npg, pg, w, stable_algo, false, true, gn); break; } case INTSXP: { IntegerVector column = x[j]; if(Rf_isObject(column)) out[j] = fnobs5pImpl(column, ext, ng, g, npg, pg, false, false, gn); else out[j] = fbstatsCpp(x[j], ext, ng, g, npg, pg, w, stable_algo, false, true, gn); break; } case STRSXP: out[j] = fnobs5pImpl(x[j], ext, ng, g, npg, pg, false, false, gn); break; case LGLSXP: out[j] = fnobs5pImpl(x[j], ext, ng, g, npg, pg, false, false, gn); break; default: stop("Not supported SEXP type!"); } } Rf_setAttrib(out, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return out; } } } } // Old / Experimental: // // template <> // NumericVector fnobs5Impl(Vector x, int ng, IntegerVector g, bool real, bool setn) { // stop("Not supported SEXP type!"); // } // // template <> // NumericVector fnobs5Impl(Vector x, int ng, IntegerVector g, bool real, bool setn) { // stop("Not supported SEXP type!"); // } // // template <> // NumericVector fnobs5Impl(Vector x, int ng, IntegerVector g, bool real, bool setn) { // stop("Not supported SEXP type!"); // } // // template <> // NumericVector fnobs5Impl(Vector x, int ng, IntegerVector g, bool real, bool setn) { // stop("Not supported SEXP type!"); // } // // // [[Rcpp::export]] // NumericVector fnobs5Cpp(SEXP x, int ng = 0, IntegerVector g = 0, bool real = false, bool setn = true){ // RCPP_RETURN_VECTOR(fnobs5Impl, x, ng, g, real, setn); // } // // [[Rcpp::export]] // SEXP fbstatsCpp(NumericVector x, int ng = 0, IntegerVector g = 0, IntegerVector gs = 0, // int npg = 0, IntegerVector pg = 0, IntegerVector pgs = 0, // SEXP w, // bool narm = true) { // int l = x.size(); // if(ng == 0 && npg == 0) { // No groups, no panel !! // int n = 0; // double min = 0, max = 0, sum = 0, sq_sum = 0; // if(narm) { // int j = l-1; // while(std::isnan(x[j]) && j!=0) --j; // min = x[j]; max = x[j]; sum = x[j]; sq_sum = x[j]; // if(j != 0) for(int i = j; i--; ) { // if(std::isnan(x[i])) continue; // sum += x[i]; // sq_sum += x[i] * x[i]; // if(min>x[i]) min = x[i]; // if(maxx[i]) min = x[i]; // if(max Not Bad at all // if(std::isnan(sum[k])) { // sum[k] = x[i]; // sq_sum[k] = x[i]*x[i]; // min[k] = x[i]; // max[k] = x[i]; // n[k] = 1; // } else { // integer for subsetting ?? // sum[k] += x[i]; // sq_sum[k] += x[i]*x[i]; // if(min[k] > x[i]) min[k] = x[i]; // if(max[k] < x[i]) max[k] = x[i]; // ++n[k]; // } // } // } // sum = sum / n; // sq_sum = sqrt((sq_sum - (sum*sum)*n)/(n-1)); // return result; // } else if (ng == 0) { // // .... // } // return R_NilValue; // } // // // [[Rcpp::export]] // SEXP test(NumericVector x) { // int l = x.size(); // int j = l-1; // while(std::isnan(x[j]) && j!=0) --j; // right -- before ?? // return NumericVector::create(j); // } // // // [[Rcpp::plugins(cpp11)]] // #include // #include // using namespace Rcpp; // // // [[Rcpp::export]] // NumericVector fbstats(NumericVector x, bool narm = false) { // possibly try quick conversion to factor?? // int l = x.size(); // //NumericVector un = unique(x); // fastest for now. see how constructed.. // //std::sort(x.begin(), x.end()); // //std::unordered_set newvalue; // //std::unordered_map counts; // Also too slow!! // // https://stackoverflow.com/questions/23150905/effective-unique-on-unordered-elements // //std::vector set(1000000000); // simple: just put true if already occurred -> Needs to be positive integers!! // //int un = 0; // //NumericVector y = x * 100000; // double min = x[0]; // what about NA_RM of the first element in NA?? // double max = x[0]; // double sum = 0; // double sq_sum = 0; // //double c_sum = 0; // //double f_sum = 0; // if(narm) { // int n = 0; // for(int i = l; i--; ) { // if(ISNAN(x[i])) continue; // sum += x[i]; // sq_sum += x[i] * x[i]; // //c_sum += sq_sum * x[i]; // //f_sum += c_sum * x[i]; // if(min>x[i]) min = x[i]; // if(maxx[i]) min = x[i]; // if(max #define USE_RINTERNALS #include #include // for uint64_t rather than unsigned long long #include // #include "types.h" // data.table depends on R>=3.0.0 when R_xlen_t was introduced // Before R 3.0.0, RLEN used to be switched to R_len_t as R_xlen_t wasn't available. // We could now replace all RLEN with R_xlen_t directly. Or keep RLEN for the shorter // name so as not to have to check closely one letter difference R_xlen_t/R_len_t. We // might also undefine R_len_t to ensure not to use it. typedef R_xlen_t RLEN; // #define PRId64 "lld" // needed in rbindlist CHECK_RANGE macro -> disabled right now.. #define IS_ASCII(x) (LEVELS(x) & 64) #define IS_TRUE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]==TRUE) #define IS_FALSE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]==FALSE) #define IS_TRUE_OR_FALSE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]!=NA_LOGICAL) #define SIZEOF(x) sizes[TYPEOF(x)] #define TYPEORDER(x) typeorder[x] // for use with bit64::integer64 #define NA_INTEGER64 INT64_MIN #define MAX_INTEGER64 INT64_MAX // Backport macros added to R in 2017 so we don't need to update dependency from R 3.0.0 #ifndef MAYBE_REFERENCED # define MAYBE_REFERENCED(x) ( NAMED(x) > 0 ) #endif #ifndef ALTREP #define ALTREP(x) 0 // for R<3.5.0, see issue #2866 and grep for "ALTREP" to see comments where it's used #endif #define SEXPPTR(x) ((SEXP *)DATAPTR(x)) // to avoid overhead of looped STRING_ELT and VECTOR_ELT // init.c // https://stackoverflow.com/questions/1410563/what-is-the-difference-between-a-definition-and-a-declaration extern SEXP char_integer64; extern SEXP char_nanotime; extern SEXP char_factor; extern SEXP char_ordered; extern SEXP char_dataframe; extern SEXP char_datatable; extern SEXP char_sf; // not currently needed (base_radixsort uses install), but perhaps later.. extern SEXP sym_sorted; // extern SEXP sym_maxgrpn; // extern SEXP sym_starts; // extern SEXP char_starts; extern SEXP sym_index; extern SEXP sym_inherits; extern SEXP sym_sf_column; extern SEXP SelfRefSymbol; extern SEXP sym_datatable_locked; extern SEXP sym_collapse_DT_alloccol; long long DtoLL(double x); double LLtoD(long long x); extern double NA_INT64_D; extern long long NA_INT64_LL; extern Rcomplex NA_CPLX; // initialized in init.c; see there for comments // radixsort Must do Cradixsort, otherwise issue on mac // SEXP Cradixsort(SEXP NA_last, SEXP decreasing, SEXP RETstrt, SEXP RETgs, SEXP SORTStr, SEXP args); // void Cdoubleradixsort(int *o, bool NA_last, bool decreasing, SEXP x); // static void dsort(double *x, int *o, int n); // dogroups.c SEXP keepattr(SEXP to, SEXP from); SEXP growVector(SEXP x, R_len_t newlen); extern size_t sizes[100]; // max appears to be FUNSXP = 99, see Rinternals.h extern size_t typeorder[100]; // assign.c void writeNA(SEXP v, const int from, const int n); void savetl_init(), savetl(SEXP s), savetl_end(); SEXP setcolorder(SEXP x, SEXP o); // subset.c SEXP subsetDT(SEXP x, SEXP rows, SEXP cols, SEXP checkrows); SEXP subsetVector(SEXP x, SEXP idx, SEXP checkidx); SEXP anyNA(SEXP x, SEXP cols); // SEXP uniqlengths(SEXP x, SEXP n); SEXP Calloccol(SEXP dt); // , SEXP Rn // frank.c SEXP dt_na(SEXP x, SEXP cols); SEXP frankds(SEXP xorderArg, SEXP xstartArg, SEXP xlenArg, SEXP dns); // assign.c const char *memrecycle(SEXP target, SEXP where, int r, int len, SEXP source, int coln, const char *colname); // utils.c bool allNA(SEXP x, bool errorForBadType); SEXP allNAv(SEXP x, SEXP errorForBadType); bool INHERITS(SEXP x, SEXP char_); SEXP copyAsPlain(SEXP x); // quickselect // double dquickselect(double *x, int n); // double iquickselect(int *x, int n); // double i64quickselect(int64_t *x, int n); collapse/src/base_radixsort.c0000644000176200001440000020012514076614505016030 0ustar liggesusers/* * R : A Computer Language for Statistical Data Analysis * Copyright (C) 2016 The R Core Team * * Based on code donated from the data.table package * (C) 2006-2015 Matt Dowle and Arun Srinivasan. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * https://www.R-project.org/Licenses/ */ #ifdef HAVE_CONFIG_H #include #endif #include "base_radixsort.h" // gs = groupsizes e.g.23, 12, 87, 2, 1, 34,... static int *gs[2] = { NULL }; //two vectors flip flopped:flip and 1 - flip static int flip = 0; //allocated stack size static int gsalloc[2] = { 0 }; static int gsngrp[2] = { 0 }; //max grpn so far static int gsmax[2] = { 0 }; //max size of stack, set by do_radixsort to nrows static int gsmaxalloc = 0; //switched off for last arg unless retGrp==TRUE static Rboolean stackgrps = TRUE; // TRUE for setkey, FALSE for by= static Rboolean sortStr = TRUE; // used by do_radixsort and [i|d|c]sort to reorder order. // not needed if narg==1 static int *newo = NULL; // =1, 0, -1 for TRUE, NA, FALSE respectively. // Value rewritten inside do_radixsort(). static int nalast = -1; // =1, -1 for ascending and descending order respectively static int order = 1; //replaced n < 200 with n < N_SMALL.Easier to change later #define N_SMALL 200 // range limit for counting sort. Should be less than INT_MAX // (see setRange for details) #define N_RANGE 100000 static SEXP *saveds = NULL; static R_len_t *savedtl = NULL, nalloc = 0, nsaved = 0; static void savetl_init() { if (nsaved || nalloc || saveds || savedtl) error("Internal error: savetl_init checks failed (%d %d %p %p).", nsaved, nalloc, saveds, savedtl); nsaved = 0; nalloc = 100; saveds = (SEXP *) malloc(nalloc * sizeof(SEXP)); if (saveds == NULL) error("Could not allocate saveds in savetl_init"); savedtl = (R_len_t *) malloc(nalloc * sizeof(R_len_t)); if (savedtl == NULL) { free(saveds); error("Could not allocate saveds in savetl_init"); } } static void savetl_end() { // Can get called if nothing has been saved yet (nsaved == 0), or // even if _init() has not been called yet (pointers NULL). Such as // to clear up before error. Also, it might be that nothing needed // to be saved anyway. for (int i = 0; i != nsaved; ++i) SET_TRLEN(saveds[i], savedtl[i]); free(saveds); // does nothing on NULL input free(savedtl); nsaved = nalloc = 0; saveds = NULL; savedtl = NULL; } static void savetl(SEXP s) { if (nsaved >= nalloc) { nalloc *= 2; char *tmp; tmp = (char *) realloc(saveds, nalloc * sizeof(SEXP)); if (tmp == NULL) { savetl_end(); error("Could not realloc saveds in savetl"); } saveds = (SEXP *) tmp; tmp = (char *) realloc(savedtl, nalloc * sizeof(R_len_t)); if (tmp == NULL) { savetl_end(); error("Could not realloc savedtl in savetl"); } savedtl = (R_len_t *) tmp; } saveds[nsaved] = s; savedtl[nsaved] = TRLEN(s); nsaved++; } // http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html#Swallowing-the-Semicolon #define Error(...) do {savetl_end(); error(__VA_ARGS__);} while(0) #undef warning // since it can be turned to error via warn = 2 #define warning(...) Do not use warning in this file /* use malloc/realloc (not Calloc/Realloc) so we can trap errors and call savetl_end() before the error(). */ static void growstack(uint64_t newlen) { // no link to icount range restriction, // just 100,000 seems a good minimum at 0.4MB if (newlen == 0) newlen = 100000; if (newlen > gsmaxalloc) newlen = gsmaxalloc; gs[flip] = realloc(gs[flip], newlen * sizeof(int)); if (gs[flip] == NULL) Error("Failed to realloc working memory stack to %d*4bytes (flip=%d)", (int)newlen /* no bigger than gsmaxalloc */, flip); gsalloc[flip] = (int)newlen; } static void push(int x) { if (!stackgrps || x == 0) return; if (gsalloc[flip] == gsngrp[flip]) growstack((uint64_t)(gsngrp[flip]) * 2); gs[flip][gsngrp[flip]++] = x; if (x > gsmax[flip]) gsmax[flip] = x; } static void mpush(int x, int n) { if (!stackgrps || x == 0) return; if (gsalloc[flip] < gsngrp[flip] + n) growstack(((uint64_t)(gsngrp[flip]) + n) * 2); for (int i = 0; i != n; ++i) gs[flip][gsngrp[flip]++] = x; if (x > gsmax[flip]) gsmax[flip] = x; } static void flipflop() { flip = 1 - flip; gsngrp[flip] = 0; gsmax[flip] = 0; if (gsalloc[flip] < gsalloc[1 - flip]) growstack((uint64_t)(gsalloc[1 - flip]) * 2); } static void gsfree() { free(gs[0]); free(gs[1]); gs[0] = NULL; gs[1] = NULL; flip = 0; gsalloc[0] = gsalloc[1] = 0; gsngrp[0] = gsngrp[1] = 0; gsmax[0] = gsmax[1] = 0; gsmaxalloc = 0; } #ifdef TIMING_ON // many calls to clock() can be expensive, // hence compiled out rather than switch(verbose) #include #define NBLOCK 20 static clock_t tblock[NBLOCK], tstart; static int nblock[NBLOCK]; #define TBEG() tstart = clock(); #define TEND(i) tblock[i] += clock()-tstart; nblock[i]++; tstart = clock(); #else #define TBEG() #define TEND(i) #endif static int range, xmin; // used by both icount and do_radixsort static void setRange(int *x, int n) { xmin = NA_INTEGER; int xmax = NA_INTEGER; double overflow; int i = 0; while(i < n && x[i] == NA_INTEGER) i++; if (i < n) xmax = xmin = x[i]; for (; i != n; ++i) { int tmp = x[i]; if (tmp == NA_INTEGER) continue; if (tmp > xmax) xmax = tmp; else if (tmp < xmin) xmin = tmp; } // all NAs, nothing to do if (xmin == NA_INTEGER) { range = NA_INTEGER; return; } // ex: x=c(-2147483647L, NA_integer_, 1L) results in overflowing int range. overflow = (double) xmax - (double) xmin + 1; // detect and force iradix here, since icount is out of the picture if (overflow > INT_MAX) { range = INT_MAX; return; } range = xmax - xmin + 1; return; } // x*order results in integer overflow when -1*NA, // so careful to avoid that here : static inline int icheck(int x) { // if nalast == 1, NAs must go last. return ((nalast != 1) ? ((x != NA_INTEGER) ? x*order : x) : ((x != NA_INTEGER) ? (x*order) - 1 : INT_MAX)); } static void icount(int *x, int *o, int n) /* Counting sort: 1. Places the ordering into o directly, overwriting whatever was there 2. Doesn't change x 3. Pushes group sizes onto stack */ { int napos = range; // NA's always counted in last bin // static is IMPORTANT, counting sort is called repetitively. static unsigned int counts[N_RANGE + 1] = { 0 }; /* counts are set back to 0 at the end efficiently. 1e5 = 0.4MB i.e tiny. We'll only use the front part of it, as large as range. So it's just reserving space, not using it. Have defined N_RANGE to be 100000.*/ if (range > N_RANGE) Error("Internal error: range = %d; isorted cannot handle range > %d", range, N_RANGE); for (int i = 0; i != n; ++i) { // For nalast=NA case, we won't remove/skip NAs, rather set 'o' indices // to 0. subset will skip them. We can't know how many NAs to skip // beforehand - i.e. while allocating "ans" vector if (x[i] == NA_INTEGER) counts[napos]++; else counts[x[i] - xmin]++; } int tmp = 0; if (nalast != 1 && counts[napos]) { push(counts[napos]); tmp += counts[napos]; } int w = (order==1) ? 0 : range-1; for (int i = 0; i != range; ++i) /* no point in adding tmp < n && i <= range, since range includes max, need to go to max, unlike 256 loops elsewhere in radixsort.c */ { if (counts[w]) { // cumulate but not through 0's. // Helps resetting zeros when n < range, below. push(counts[w]); counts[w] = (tmp += counts[w]); } w += order; // order is +1 or -1 } if (nalast == 1 && counts[napos]) { push(counts[napos]); counts[napos] = (tmp += counts[napos]); } for (int i = n - 1; i >= 0; i--) { // This way na.last=TRUE/FALSE cases will have just a // single if-check overhead. o[--counts[(x[i] == NA_INTEGER) ? napos : x[i] - xmin]] = (int) (i + 1); } // nalast = 1, -1 are both taken care already. if (nalast == 0) // nalast = 0 is dealt with separately as it just sets o to 0 for (int i = 0; i != n; ++i) o[i] = (x[o[i] - 1] == NA_INTEGER) ? 0 : o[i]; // at those indices where x is NA. x[o[i]-1] because x is not modifed here. /* counts were cumulated above so leaves non zero. Faster to clear up now ready for next time. */ if (n < range) { /* Many zeros in counts already. Loop through n instead, doesn't matter if we set to 0 several times on any repeats */ counts[napos] = 0; for (int i = 0; i != n; ++i) { if (x[i] != NA_INTEGER) counts[x[i] - xmin] = 0; } } else memset(counts, 0, (range + 1) * sizeof(int)); return; } static void iinsert(int *x, int *o, int n) /* orders both x and o by reference in-place. Fast for small vectors, low overhead. don't be tempted to binsearch backwards here, have to shift anyway; many memmove would have overhead and do the same thing. */ /* when nalast == 0, iinsert will be called only from within iradix, where o[.] = 0 for x[.]=NA is already taken care of */ { for (int i = 1; i != n; ++i) { int xtmp = x[i]; if (xtmp < x[i - 1]) { int j = i - 1; int otmp = o[i]; while (j >= 0 && xtmp < x[j]) { x[j + 1] = x[j]; o[j + 1] = o[j]; j--; } x[j + 1] = xtmp; o[j + 1] = otmp; } } int tt = 0; for (int i = 1; i != n; ++i) { if (x[i] == x[i - 1]) tt++; else { push(tt + 1); tt = 0; } } push(tt + 1); // INCLUDED ?? } /* iradix is a counting sort performed forwards from MSB to LSB, with some tricks and short circuits building on Terdiman and Herf. http://codercorner.com/RadixSortRevisited.htm http://stereopsis.com/radix.html ~ Note they are LSD, but we do MSD here which is more complicated, for efficiency. ~ NAs need no special treatment as NA is the most negative integer in R (checked in init.c once, for efficiency) so NA naturally sort to the front. ~ Using 4-pass 1-byte radix for the following reasons : * 11-bit (Herf) reduces to 3-passes (3*11=33) yes, and LSD need random access to o vector in each pass 1:n so reduction in passes is good, but Terdiman's idea to skip a radix if all values are equal occurs less the wider the radix. A narrower radix benefits more from that. * That's detected here using a single 'if', an improvement on Terdiman's exposition of a single loop to find if any count==n * The pass through counts bites when radix is wider, because we repetitively call this iradix from fastorder forwards. * Herf's parallel histogramming is neat. In 4-pass 1-byte it needs 4*256 storage, that's tiny, and can be static. 4*256 << 3*2048. 4-pass 1-byte is simpler and tighter code than 3-pass 11-bit, giving modern optimizers and modern CPUs a better chance. We may get lucky anyway, if one or two of the 4-passes are skipped. Recall: there are no comparisons at all in counting and radix, there is wide random access in each LSD radix pass, though. */ // 4 are used for iradix, 8 for dradix and i64radix static unsigned int radixcounts[8][257] = { {0} }; static int skip[8]; /* global because iradix and iradix_r interact and are called repetitively. counts are set back to 0 after each use, to benefit from skipped radix. */ static void *radix_xsub = NULL; static size_t radix_xsuballoc = 0; static int *otmp = NULL, otmp_alloc = 0; static void alloc_otmp(int n) { if (otmp_alloc >= n) return; otmp = (int *) realloc(otmp, n * sizeof(int)); if (otmp == NULL) Error("Failed to allocate working memory for otmp. Requested %d * %d bytes", n, sizeof(int)); otmp_alloc = n; } // TO DO: save xtmp if possible, see allocs in do_radixsort static void *xtmp = NULL; static int xtmp_alloc = 0; // TO DO: currently always the largest type (double) but // could be int if that's all that's needed static void alloc_xtmp(int n) { if (xtmp_alloc >= n) return; xtmp = (double *) realloc(xtmp, n * sizeof(double)); if (xtmp == NULL) Error("Failed to allocate working memory for xtmp. Requested %d * %d bytes", n, sizeof(double)); xtmp_alloc = n; } static void iradix_r(int *xsub, int *osub, int n, int radix); static void iradix(int *x, int *o, int n) /* As icount : Places the ordering into o directly, overwriting whatever was there Doesn't change x Pushes group sizes onto stack */ { int nextradix, itmp, thisgrpn, maxgrpn; unsigned int thisx = 0, shift, *thiscounts; for (int i = 0; i != n; ++i) { /* parallel histogramming pass; i.e. count occurrences of 0:255 in each byte. Sequential so almost negligible. */ // relies on overflow behaviour. And shouldn't -INT_MIN be up in iradix? thisx = (unsigned int) (icheck(x[i])) - INT_MIN; // unrolled since inside n-loop radixcounts[0][thisx & 0xFF]++; radixcounts[1][thisx >> 8 & 0xFF]++; radixcounts[2][thisx >> 16 & 0xFF]++; radixcounts[3][thisx >> 24 & 0xFF]++; } for (int radix = 0; radix < 4; radix++) { /* any(count == n) => all radix must have been that value => last x (still thisx) was that value */ int i = thisx >> (radix*8) & 0xFF; skip[radix] = radixcounts[radix][i] == n; // clear it now, the other counts must be 0 already if (skip[radix]) radixcounts[radix][i] = 0; } int radix = 3; // MSD while (radix >= 0 && skip[radix]) radix--; if (radix == -1) { // All radix are skipped; one number repeated n times. if (nalast == 0 && x[0] == NA_INTEGER) // all values are identical. return 0 if nalast=0 & all NA // because of 'return', have to take care of it here. for (int i = 0; i != n; ++i) o[i] = 0; else for (int i = 0; i != n; ++i) o[i] = (i + 1); push(n); return; } for (int i = radix - 1; i >= 0; i--) { if (!skip[i]) memset(radixcounts[i], 0, 257 * sizeof(unsigned int)); /* clear the counts as we only needed the parallel pass for skip[] and we're going to use radixcounts again below. Can't use parallel lower counts in MSD radix, unlike LSD. */ } thiscounts = radixcounts[radix]; shift = radix * 8; itmp = thiscounts[0]; maxgrpn = itmp; for (int i = 1; itmp < n && i < 256; ++i) { thisgrpn = thiscounts[i]; if (thisgrpn) { // don't cummulate through 0s, important below. if (thisgrpn > maxgrpn) maxgrpn = thisgrpn; thiscounts[i] = (itmp += thisgrpn); } } for (int i = n - 1; i >= 0; i--) { thisx = ((unsigned int) (icheck(x[i])) - INT_MIN) >> shift & 0xFF; o[--thiscounts[thisx]] = i + 1; } if (radix_xsuballoc < maxgrpn) { // The largest group according to the first non-skipped radix, // so could be big (if radix is needed on first arg) // TO DO: could include extra bits to divide the first radix // up more. Often the MSD has groups in just 0-4 out of 256. // free'd at the end of do_radixsort once we're done calling iradix // repetitively radix_xsub = (int *) realloc(radix_xsub, maxgrpn * sizeof(double)); if (!radix_xsub) Error("Failed to realloc working memory %d*8bytes (xsub in iradix), radix=%d", maxgrpn, radix); radix_xsuballoc = maxgrpn; } // TO DO: can we leave this to do_radixsort and remove these calls?? alloc_otmp(maxgrpn); // TO DO: doesn't need to be sizeof(double) always, see inside alloc_xtmp(maxgrpn); nextradix = radix - 1; while (nextradix >= 0 && skip[nextradix]) nextradix--; if (thiscounts[0] != 0) Error("Internal error. thiscounts[0]=%d but should have been decremented to 0. dradix=%d", thiscounts[0], radix); thiscounts[256] = n; itmp = 0; for (int i = 1; itmp < n && i <= 256; ++i) { if (thiscounts[i] == 0) continue; // undo cumulate; i.e. diff thisgrpn = thiscounts[i] - itmp; if (thisgrpn == 1 || nextradix == -1) { push(thisgrpn); } else { for (int j = 0; j != thisgrpn; ++j) // this is why this xsub here can't be the same memory as // xsub in do_radixsort. ((int *)radix_xsub)[j] = icheck(x[o[itmp+j]-1]); // changes xsub and o by reference recursively. iradix_r(radix_xsub, o+itmp, thisgrpn, nextradix); } itmp = thiscounts[i]; thiscounts[i] = 0; } if (nalast == 0) // nalast = 1, -1 are both taken care already. // nalast = 0 is dealt with separately as it just sets o to 0 for (int i = 0; i != n; ++i) o[i] = (x[o[i] - 1] == NA_INTEGER) ? 0 : o[i]; // at those indices where x is NA. x[o[i]-1] because x is not // modified by reference unlike iinsert or iradix_r } static void iradix_r(int *xsub, int *osub, int n, int radix) // xsub is a recursive offset into xsub working memory above in // iradix, reordered by reference. osub is a an offset into the main // answer o, reordered by reference. radix iterates 3,2,1,0 { int j, itmp, thisx, thisgrpn, nextradix, shift; unsigned int *thiscounts; // N_SMALL=200 is guess based on limited testing. Needs // calibrate(). Was 50 based on sum(1:50)=1275 worst -vs- 256 // cummulate + 256 memset + allowance since reverse order is // unlikely. when nalast==0, iinsert will be called only from // within iradix. if (n < N_SMALL) { iinsert(xsub, osub, n); return; } shift = radix * 8; thiscounts = radixcounts[radix]; for (int i = 0; i != n; ++i) { thisx = (unsigned int) xsub[i] - INT_MIN; // sequential in xsub thiscounts[thisx >> shift & 0xFF]++; } itmp = thiscounts[0]; for (int i = 1; itmp < n && i < 256; ++i) { if (thiscounts[i]) // don't cummulate through 0s, important below thiscounts[i] = (itmp += thiscounts[i]); } // INCLUDED ?? for (int i = n - 1; i >= 0; i--) { thisx = ((unsigned int) xsub[i] - INT_MIN) >> shift & 0xFF; j = --thiscounts[thisx]; otmp[j] = osub[i]; ((int *) xtmp)[j] = xsub[i]; } memcpy(osub, otmp, n * sizeof(int)); memcpy(xsub, xtmp, n * sizeof(int)); nextradix = radix - 1; while (nextradix >= 0 && skip[nextradix]) nextradix--; /* TO DO: If nextradix == -1 AND no further args from do_radixsort AND !retGrp, we're done. We have o. Remember to memset thiscounts before returning. */ if (thiscounts[0] != 0) Error("Logical error. thiscounts[0]=%d but should have been decremented to 0. radix=%d", thiscounts[0], radix); thiscounts[256] = n; itmp = 0; for (int i = 1; itmp < n && i <= 256; ++i) { if (thiscounts[i] == 0) continue; thisgrpn = thiscounts[i] - itmp; // undo cummulate; i.e. diff if (thisgrpn == 1 || nextradix == -1) { push(thisgrpn); } else { iradix_r(xsub+itmp, osub+itmp, thisgrpn, nextradix); } itmp = thiscounts[i]; thiscounts[i] = 0; } } // dradix from Arun's fastradixdouble.c // + changed to MSD and hooked into do_radixsort framework here. // + replaced tolerance with rounding s.f. static unsigned long long dmask1; static unsigned long long dmask2; static void setNumericRounding(int dround) { dmask1 = dround ? 1 << (8 * dround - 1) : 0; dmask2 = 0xffffffffffffffff << dround * 8; } static union { double d; unsigned long long ull; } u; static unsigned long long dtwiddle(void *p, int i, int order) { u.d = order * ((double *)p)[i]; // take care of 'order' at the beginning if (R_FINITE(u.d)) { u.ull = (u.d != 0.0) ? u.ull + ((u.ull & dmask1) << 1) : 0; } else if (ISNAN(u.d)) { u.ull = 0; return (nalast == 1 ? ~u.ull : u.ull); } unsigned long long mask = (u.ull & 0x8000000000000000) ? // always flip sign bit and if negative (sign bit was set) // flip other bits too 0xffffffffffffffff : 0x8000000000000000; return ((u.ull ^ mask) & dmask2); } static Rboolean dnan(void *p, int i) { u.d = ((double *) p)[i]; return (ISNAN(u.d)); } static unsigned long long (*twiddle) (void *, int, int); static Rboolean(*is_nan) (void *, int); // the size of the arg type (4 or 8). Just 8 currently until iradix is // merged in. static size_t colSize = 8; static void dradix_r(unsigned char *xsub, int *osub, int n, int radix); #ifdef WORDS_BIGENDIAN #define RADIX_BYTE colSize - radix - 1 #else #define RADIX_BYTE radix #endif static void dradix(unsigned char *x, int *o, int n) { int radix, nextradix, itmp, thisgrpn, maxgrpn; unsigned int *thiscounts; unsigned long long thisx = 0; // see comments in iradix for structure. This follows the same. // TO DO: merge iradix in here (almost ready) for (int i = 0; i != n; ++i) { thisx = twiddle(x, i, order); for (radix = 0; radix != colSize; ++radix) // if dround == 2 then radix 0 and 1 will be all 0 here and skipped. /* on little endian, 0 is the least significant bits (the right) and 7 is the most including sign (the left); i.e. reversed. */ radixcounts[radix][((unsigned char *)&thisx)[RADIX_BYTE]]++; } for (radix = 0; radix != colSize; ++radix) { // thisx is the last x after loop above int i = ((unsigned char *) &thisx)[RADIX_BYTE]; skip[radix] = radixcounts[radix][i] == n; // clear it now, the other counts must be 0 already if (skip[radix]) radixcounts[radix][i] = 0; } radix = (int) colSize - 1; // MSD while (radix >= 0 && skip[radix]) radix--; if (radix == -1) { // All radix are skipped; i.e. one number repeated n times. if (nalast == 0 && is_nan(x, 0)) // all values are identical. return 0 if nalast=0 & all NA // because of 'return', have to take care of it here. for (int i = 0; i != n; ++i) o[i] = 0; else for (int i = 0; i != n; ++i) o[i] = (i + 1); push(n); return; } for (int i = radix - 1; i >= 0; i--) { // clear the lower radix counts, we only did them to know // skip. will be reused within each group if (!skip[i]) memset(radixcounts[i], 0, 257 * sizeof(unsigned int)); } thiscounts = radixcounts[radix]; itmp = thiscounts[0]; maxgrpn = itmp; for (int i = 1; itmp < n && i < 256; ++i) { thisgrpn = thiscounts[i]; if (thisgrpn) { // don't cummulate through 0s, important below if (thisgrpn > maxgrpn) maxgrpn = thisgrpn; thiscounts[i] = (itmp += thisgrpn); } } for (int i = n - 1; i >= 0; i--) { thisx = twiddle(x, i, order); o[ --thiscounts[((unsigned char *)&thisx)[RADIX_BYTE]] ] = i + 1; } if (radix_xsuballoc < maxgrpn) { // TO DO: centralize this alloc // The largest group according to the first non-skipped radix, // so could be big (if radix is needed on first arg) TO DO: // could include extra bits to divide the first radix up // more. Often the MSD has groups in just 0-4 out of 256. // free'd at the end of do_radixsort once we're done calling iradix // repetitively radix_xsub = (double *) realloc(radix_xsub, maxgrpn * sizeof(double)); if (!radix_xsub) Error("Failed to realloc working memory %d*8bytes (xsub in dradix), radix=%d", maxgrpn, radix); radix_xsuballoc = maxgrpn; } alloc_otmp(maxgrpn); // TO DO: leave to do_radixsort and remove these? alloc_xtmp(maxgrpn); nextradix = radix - 1; while (nextradix >= 0 && skip[nextradix]) nextradix--; if (thiscounts[0] != 0) Error("Logical error. thiscounts[0]=%d but should have been decremented to 0. dradix=%d", thiscounts[0], radix); thiscounts[256] = n; itmp = 0; for (int i = 1; itmp < n && i <= 256; ++i) { if (thiscounts[i] == 0) continue; thisgrpn = thiscounts[i] - itmp; // undo cummulate; i.e. diff if (thisgrpn == 1 || nextradix == -1) { push(thisgrpn); } else { if (colSize == 4) { // ready for merging in iradix ... error("Not yet used, still using iradix instead"); for (int j = 0; j != thisgrpn; ++j) ((int *)radix_xsub)[j] = (int)twiddle(x, o[itmp+j]-1, order); // this is why this xsub here can't be the same memory // as xsub in do_radixsort } else for (int j = 0; j != thisgrpn; ++j) ((unsigned long long *)radix_xsub)[j] = twiddle(x, o[itmp+j]-1, order); // changes xsub and o by reference recursively. dradix_r(radix_xsub, o+itmp, thisgrpn, nextradix); } itmp = thiscounts[i]; thiscounts[i] = 0; } if (nalast == 0) // nalast = 1, -1 are both taken care already. for (int i = 0; i != n; ++i) o[i] = is_nan(x, o[i] - 1) ? 0 : o[i]; // nalast = 0 is dealt with separately as it just sets o to 0 // at those indices where x is NA. x[o[i]-1] because x is not // modified by reference unlike iinsert or iradix_r } static void dinsert(unsigned long long *x, int *o, int n) // orders both x and o by reference in-place. Fast for small vectors, // low overhead. don't be tempted to binsearch backwards here, have // to shift anyway; many memmove would have overhead and do the same // thing 'dinsert' will not be called when nalast = 0 and o[0] = -1. { int otmp, tt; unsigned long long xtmp; for (int i = 1; i != n; ++i) { xtmp = x[i]; if (xtmp < x[i - 1]) { int j = i - 1; otmp = o[i]; while (j >= 0 && xtmp < x[j]) { x[j + 1] = x[j]; o[j + 1] = o[j]; j--; } x[j + 1] = xtmp; o[j + 1] = otmp; } } tt = 0; for (int i = 1; i != n; ++i) { if (x[i] == x[i - 1]) tt++; else { push(tt + 1); tt = 0; } } // INCLUDED ?? push(tt + 1); } static void dradix_r(unsigned char *xsub, int *osub, int n, int radix) /* xsub is a recursive offset into xsub working memory above in dradix, reordered by reference. osub is a an offset into the main answer o, reordered by reference. dradix iterates 7,6,5,4,3,2,1,0 */ { int itmp, thisgrpn, nextradix; unsigned int *thiscounts; unsigned char *p; if (n < 200) { /* 200 is guess based on limited testing. Needs calibrate(). Was 50 based on sum(1:50)=1275 worst -vs- 256 cummulate + 256 memset + allowance since reverse order is unlikely */ // order=1 here because it's already taken care of in iradix dinsert((void *)xsub, osub, n); return; } thiscounts = radixcounts[radix]; p = xsub + RADIX_BYTE; for (int i = 0; i != n; ++i) { thiscounts[*p]++; p += colSize; } itmp = thiscounts[0]; for (int i = 1; itmp < n && i < 256; ++i) { if (thiscounts[i]) // don't cummulate through 0s, important below thiscounts[i] = (itmp += thiscounts[i]); } // INCLUDED ?? p = xsub + (n - 1) * colSize; if (colSize == 4) { error("Not yet used, still using iradix instead"); for (int i = n - 1; i >= 0; i--) { int j = --thiscounts[*(p + RADIX_BYTE)]; otmp[j] = osub[i]; ((int *) xtmp)[j] = *(int *) p; p -= colSize; } } else { for (int i = n - 1; i >= 0; i--) { int j = --thiscounts[*(p + RADIX_BYTE)]; otmp[j] = osub[i]; ((unsigned long long *) xtmp)[j] = *(unsigned long long *) p; p -= colSize; } } memcpy(osub, otmp, n * sizeof(int)); memcpy(xsub, xtmp, n * colSize); nextradix = radix - 1; while (nextradix >= 0 && skip[nextradix]) nextradix--; // TO DO: If nextradix==-1 and no further args from do_radixsort, // we're done. We have o. Remember to memset thiscounts before // returning. if (thiscounts[0] != 0) Error("Logical error. thiscounts[0]=%d but should have been decremented to 0. radix=%d", thiscounts[0], radix); thiscounts[256] = n; itmp = 0; for (int i = 1; itmp < n && i <= 256; ++i) { if (thiscounts[i] == 0) continue; thisgrpn = thiscounts[i] - itmp; // undo cummulate; i.e. diff if (thisgrpn == 1 || nextradix == -1) push(thisgrpn); else dradix_r(xsub + itmp * colSize, osub + itmp, thisgrpn, nextradix); itmp = thiscounts[i]; thiscounts[i] = 0; } } // TO DO?: dcount. Find step size, then range = (max-min)/step and // proceed as icount. Many fixed precision floats (such as prices) may // be suitable. Fixed precision such as 1.10, 1.15, 1.20, 1.25, 1.30 // ... do use all bits so dradix skipping may not help. static int *cradix_counts = NULL; static int cradix_counts_alloc = 0; static int maxlen = 1; static SEXP *cradix_xtmp = NULL; static int cradix_xtmp_alloc = 0; // same as StrCmp but also takes into account 'decreasing' and 'na.last' args. static int StrCmp2(SEXP x, SEXP y) { // same cached pointer (including NA_STRING == NA_STRING) if (x == y) return 0; // if x=NA, nalast=1 ? then x > y else x < y (Note: nalast == 0 is // already taken care of in 'csorted', won't be 0 here) if (x == NA_STRING) return nalast; if (y == NA_STRING) return -nalast; // if y=NA, nalast=1 ? then y > x return order*strcmp(CHAR(x), CHAR(y)); // same as explanation in StrCmp } static int StrCmp(SEXP x, SEXP y) // also used by bmerge and chmatch { // same cached pointer (including NA_STRING == NA_STRING) if (x == y) return 0; if (x == NA_STRING) return -1; // x < y if (y == NA_STRING) return 1; // x > y // assumes strings are in same encoding return strcmp(CHAR(x), CHAR(y)); } #define CHAR_ENCODING(x) (IS_ASCII(x) ? CE_UTF8 : getCharCE(x)) static void checkEncodings(SEXP x) { cetype_t ce; SEXP *px = STRING_PTR(x); int i, lx = length(x); for (i = 0; i != lx && px[i] == NA_STRING; ++i); if (i < lx) { ce = CHAR_ENCODING(px[i]); if (ce == CE_NATIVE) { error("Character encoding must be UTF-8, Latin-1 or bytes"); } } /* Disabled for now -- doubles the time (for already sorted vectors): why? for (int i = 1; i < length(x); i++) { if (ce != CHAR_ENCODING(STRING_ELT(x, i))) { error("Mixed character encodings are not supported"); } } */ } static void cradix_r(SEXP * xsub, int n, int radix) // xsub is a unique set of CHARSXP, to be ordered by reference // First time, radix == 0, and xsub == x. Then recursively moves SEXP together // for L1 cache efficiency. // Quite different to iradix because // 1) x is known to be unique so fits in cache // (wide random access not an issue) // 2) they're variable length character strings // 3) no need to maintain o. Just simply reorder x. No grps or push. // Fortunately, UTF sorts in the same order if treated as ASCII, so we // can simplify by doing it by bytes. // TO DO: confirm a forwards (MSD) radix for efficiency, although more // complicated. // This part has nothing to do with truelength. The // truelength stuff is to do with finding the unique strings. We may // be able to improve CHARSXP derefencing by submitting patch to R to // make R's string cache contiguous but would likely be difficult. If // we strxfrm, then it'll then be contiguous and compact then anyway. { int itmp, *thiscounts, thisgrpn=0, thisx=0; SEXP stmp; // TO DO?: chmatch to existing sorted vector, then grow it. // TO DO?: if (n= 0; i--) { thisx = xsub[i] == NA_STRING ? 0 : (radix < LENGTH(xsub[i]) ? (unsigned char) (CHAR(xsub[i])[radix]) : 1); int j = --thiscounts[thisx]; cradix_xtmp[j] = xsub[i]; } memcpy(xsub, cradix_xtmp, n * sizeof(SEXP)); if (radix == maxlen - 1) { memset(thiscounts, 0, 256 * sizeof(int)); return; } if (thiscounts[0] != 0) Error("Logical error. counts[0]=%d in cradix but should have been decremented to 0. radix=%d", thiscounts[0], radix); itmp = 0; for (int i = 1; i != 256; ++i) { if (thiscounts[i] == 0) continue; thisgrpn = thiscounts[i] - itmp; // undo cummulate; i.e. diff cradix_r(xsub + itmp, thisgrpn, radix + 1); itmp = thiscounts[i]; // set to 0 now since we're here, saves memset // afterwards. Important to clear! Also more portable for // machines where 0 isn't all bits 0 (?!) thiscounts[i] = 0; } if (itmp < n - 1) cradix_r(xsub + itmp, n - itmp, radix + 1); // final group } static SEXP *ustr = NULL; static int ustr_alloc = 0, ustr_n = 0; static void cgroup(SEXP * x, int *o, int n) // As icount : // Places the ordering into o directly, overwriting whatever was there // Doesn't change x // Pushes group sizes onto stack // Only run when sortStr == FALSE. Basically a counting sort, in first // appearance order, directly. Since it doesn't sort the strings, the // name is cgroup. there is no _pre for this. ustr created and // cleared each time. { // savetl_init() is called once at the start of do_radixsort if (ustr_n != 0) Error ("Internal error. ustr isn't empty when starting cgroup: ustr_n=%d, ustr_alloc=%d", ustr_n, ustr_alloc); for (int i = 0; i != n; ++i) { SEXP s = x[i]; if (TRLEN(s) < 0) { // this case first as it's the most frequent SET_TRLEN(s, TRLEN(s) - 1); // use negative counts so as to detect R's own (positive) // usage of tl on CHARSXP continue; } if (TRLEN(s) > 0) { // Save any of R's own usage of tl (assumed positive, so // we can both count and save in one scan), to restore // afterwards. From R 2.14.0, tl is initialized to 0, // prior to that it was random so this step saved too much. savetl(s); SET_TRLEN(s, 0); } if (ustr_alloc <= ustr_n) { // 10000 = 78k of 8byte pointers. Small initial guess, // negligible time to alloc. ustr_alloc = (ustr_alloc == 0) ? 10000 : ustr_alloc*2; if (ustr_alloc > n) ustr_alloc = n; ustr = realloc(ustr, ustr_alloc * sizeof(SEXP)); if (ustr == NULL) Error("Unable to realloc %d * %d bytes in cgroup", ustr_alloc, sizeof(SEXP)); } SET_TRLEN(s, -1); ustr[ustr_n++] = s; } // TO DO: the same string in different encodings will be // considered different here. Sweep through ustr and merge counts // where equal (sort needed therefore, unfortunately?, only if // there are any marked encodings present) int cumsum = 0; for (int i = 0; i != ustr_n; ++i) { // 0.000 push(-TRLEN(ustr[i])); SET_TRLEN(ustr[i], cumsum += -TRLEN(ustr[i])); } int *target = (o[0] != -1) ? newo : o; for (int i = n - 1; i >= 0; i--) { SEXP s = x[i]; // 0.400 (page fetches on string cache) int k = TRLEN(s) - 1; SET_TRLEN(s, k); target[k] = i + 1; // 0.800 (random access to o) } // The cummulate meant counts are left non zero, so reset for next // time (0.00s). for (int i = 0; i != ustr_n; ++i) SET_TRLEN(ustr[i], 0); ustr_n = 0; } static int *csort_otmp = NULL, csort_otmp_alloc = 0; static void alloc_csort_otmp(int n) { if (csort_otmp_alloc >= n) return; csort_otmp = (int *) realloc(csort_otmp, n * sizeof(int)); if (csort_otmp == NULL) Error ("Failed to allocate working memory for csort_otmp. Requested %d * %d bytes", n, sizeof(int)); csort_otmp_alloc = n; } static void csort(SEXP * x, int *o, int n) /* As icount : Places the ordering into o directly, overwriting whatever was there Doesn't change x Pushes group sizes onto stack Requires csort_pre() to have created and sorted ustr already */ { /* can't use otmp, since iradix might be called here and that uses otmp (and xtmp). alloc_csort_otmp(n) is called from do_radixsort for either n=nrow if 1st arg, or n=maxgrpn if onwards args */ for (int i = 0; i != n; ++i) csort_otmp[i] = (x[i] == NA_STRING) ? NA_INTEGER : -TRLEN(x[i]); if (nalast == 0 && n == 2) { // special case for nalast == 0. n == 1 is handled inside // do_radixsort. at least 1 will be NA here else use o from caller // directly (not 1st arg) if (o[0] == -1) for (int i = 0; i != n; ++i) o[i] = i + 1; for (int i = 0; i != n; ++i) { if (csort_otmp[i] == NA_INTEGER) o[i] = 0; } // INCLUDED ?? push(1); push(1); return; } if (n < N_SMALL && nalast != 0) { // TO DO: calibrate() N_SMALL=200 if (o[0] == -1) for (int i = 0; i != n; ++i) o[i] = i + 1; // else use o from caller directly (not 1st arg) for (int i = 0; i != n; ++i) csort_otmp[i] = icheck(csort_otmp[i]); iinsert(csort_otmp, o, n); } else { setRange(csort_otmp, n); if (range == NA_INTEGER) Error("Internal error. csort's otmp contains all-NA"); int *target = (o[0] != -1) ? newo : o; if (range <= N_RANGE) // TO DO: calibrate(). radix was faster (9.2s // "range<=10000" instead of 11.6s "range<=N_RANGE && // range 0) { savetl(s); SET_TRLEN(s, 0); } if (ustr_alloc <= ustr_n) { // 10000 = 78k of 8byte pointers. Small initial guess, // negligible time to alloc. ustr_alloc = (ustr_alloc == 0) ? 10000 : ustr_alloc*2; if (ustr_alloc > old_un+n) ustr_alloc = old_un + n; ustr = realloc(ustr, ustr_alloc * sizeof(SEXP)); if (ustr == NULL) Error("Failed to realloc ustr. Requested %d * %d bytes", ustr_alloc, sizeof(SEXP)); } SET_TRLEN(s, -1); // this -1 will become its ordering later below ustr[ustr_n++] = s; // length on CHARSXP is the nchar of char * (excluding \0), // and treats marked encodings as if ascii. if (s != NA_STRING && LENGTH(s) > maxlen) maxlen = LENGTH(s); } new_un = ustr_n; if (new_un == old_un) return; // No new strings observed, seen them all before in previous // arg. ustr already sufficient. If we ever make ustr // permanently held by data.table, we'll just need to make the // final loop to set -i-1 before returning here. sort ustr. // TODO: just sort new ones and merge them in. These allocs are // here, to save them being in the recursive cradix_r() if (cradix_counts_alloc < maxlen) { cradix_counts_alloc = maxlen + 10; // +10 to save too many reallocs cradix_counts = (int *)realloc(cradix_counts, cradix_counts_alloc * 256 * sizeof(int)); if (!cradix_counts) Error("Failed to alloc cradix_counts"); memset(cradix_counts, 0, cradix_counts_alloc * 256 * sizeof(int)); } if (cradix_xtmp_alloc < ustr_n) { cradix_xtmp = (SEXP *) realloc(cradix_xtmp, ustr_n * sizeof(SEXP)); // TO DO: Reuse the one we have in do_radixsort. // Does it need to be n length? if (!cradix_xtmp) Error("Failed to alloc cradix_tmp"); cradix_xtmp_alloc = ustr_n; } // sorts ustr in-place by reference save ordering in the // CHARSXP. negative so as to distinguish with R's own usage. cradix_r(ustr, ustr_n, 0); for (int i = 0; i != ustr_n; ++i) SET_TRLEN(ustr[i], -i - 1); } // functions to test vectors for sortedness: isorted, dsorted and csorted // base:is.unsorted returns NA in the presence of any NA, but we need // to consider na.last, and we also return -1 if x is sorted in // _strictly_ reverse order; a common case we optimize. If a vector // is in decreasing order *with ties*, then an in-place reverse (no // sort) would result in instability of ties, so we are strict. We // also save grouping information during the check; that information // is required when sorting by multiple arguments. // TO DO: test in big steps first to return faster if unsortedness is // at the end (a common case of rbind'ing data to end) These are all // sequential access to x, so very quick and cache efficient. // order = 1 is ascending and order=-1 is descending; also takes care // of na.last argument with check through 'icheck' Relies on // NA_INTEGER == INT_MIN, checked in init.c static int isorted(int *x, int n) { int i = 1, j = 0; // when nalast = NA, // all NAs ? return special value to replace all o's values with '0' // any NAs ? return 0 = unsorted and leave it // to sort routines to replace o's with 0's // no NAs ? continue to check rest of isorted - the same routine as usual if (nalast == 0) { for (int k = 0; k != n; ++k) { if (x[k] != NA_INTEGER) j++; } // INCLUDED ?? if (j == 0) { push(n); return (-2); } if (j != n) return (0); } if (n <= 1) { push(n); return (1); } if (icheck(x[1]) < icheck(x[0])) { i = 2; while (i < n && icheck(x[i]) < icheck(x[i - 1])) i++; // strictly opposite to expected 'order', no ties; if (i == n) { mpush(1, n); return (-1); } // e.g. no more than one NA at the beginning/end (for order=-1/1) else return (0); } int old = gsngrp[flip]; int tt = 1; for (int i = 1; i != n; ++i) { if (icheck(x[i]) < icheck(x[i - 1])) { gsngrp[flip] = old; return (0); } if (x[i] == x[i - 1]) tt++; else { push(tt); tt = 1; } } push(tt); // same as 'order', NAs at the beginning for order=1, at end for // order=-1, possibly with ties return(1); } // order=1 is ascending and -1 is descending // also accounts for nalast=0 (=NA), =1 (TRUE), -1 (FALSE) (in twiddle) static int dsorted(double *x, int n) { int i = 1, j = 0; unsigned long long prev, this; if (nalast == 0) { // when nalast = NA, // all NAs ? return special value to replace all o's values with '0' // any NAs ? return 0 = unsorted and leave it to sort routines to // replace o's with 0's // no NAs ? continue to check the rest of isorted - // the same routine as usual for (int k = 0; k != n; ++k) { if (!is_nan(x, k)) j++; } // INCLUDED ?? if (j == 0) { push(n); return (-2); } if (j != n) return (0); } if (n <= 1) { push(n); return (1); } prev = twiddle(x, 0, order); this = twiddle(x, 1, order); if (this < prev) { i = 2; prev = this; while (i < n && (this = twiddle(x, i, order)) < prev) { i++; prev = this; } if (i == n) { mpush(1, n); return (-1); } // strictly opposite of expected 'order', no ties; e.g. no // more than one NA at the beginning/end (for order=-1/1) // TO DO: improve to be stable for ties in reverse else return(0); } int old = gsngrp[flip]; int tt = 1; for (int i = 1; i != n; ++i) { // TO DO: once we get past -Inf, NA and NaN at the bottom, and // +Inf at the top, the middle only need be twiddled // for tolerance (worth it?) this = twiddle(x, i, order); if (this < prev) { gsngrp[flip] = old; return (0); } if (this == prev) tt++; else { push(tt); tt = 1; } prev = this; } push(tt); // exactly as expected in 'order' (1=increasing, -1=decreasing), // possibly with ties return (1); } // order=1 is ascending and -1 is descending // also accounts for nalast=0 (=NA), =1 (TRUE), -1 (FALSE) static int csorted(SEXP *x, int n) { int i = 1, j = 0, tmp; if (nalast == 0) { // when nalast = NA, // all NAs ? return special value to replace all o's values with '0' // any NAs ? return 0 = unsorted and leave it to sort routines // to replace o's with 0's // no NAs ? continue to check the rest of isorted - // the same routine as usual for (int k = 0; k != n; ++k) { if (x[k] != NA_STRING) j++; } // INCLUDED ?? if (j == 0) { push(n); return (-2); } if (j != n) return (0); } if (n <= 1) { push(n); return (1); } if (StrCmp2(x[1], x[0]) < 0) { i = 2; while (i < n && StrCmp2(x[i], x[i - 1]) < 0) i++; if (i == n) { mpush(1, n); return (-1); } // strictly opposite of expected 'order', no ties; // e.g. no more than one NA at the beginning/end (for order=-1/1) else return (0); } int old = gsngrp[flip]; int tt = 1; for (int i = 1; i != n; ++i) { tmp = StrCmp2(x[i], x[i - 1]); if (tmp < 0) { gsngrp[flip] = old; return (0); } if (tmp == 0) tt++; else { push(tt); tt = 1; } } push(tt); // exactly as expected in 'order', possibly with ties return (1); } static void isort(int *x, int *o, int n) { if (n <= 2) { // nalast = 0 and n == 2 (check bottom of this file for explanation) if (nalast == 0 && n == 2) { if (o[0] == -1) { o[0] = 1; o[1] = 2; } for (int i = 0; i != n; ++i) { if (x[i] == NA_INTEGER) o[i] = 0; } // INCLUDED ?? push(1); push(1); return; } else Error("Internal error: isort received n=%d. isorted should have dealt with this (e.g. as a reverse sorted vector) already",n); } if (n < N_SMALL && o[0] != -1 && nalast != 0) { // see comment above in iradix_r on N_SMALL=200. /* if not o[0] then can't just populate with 1:n here, since x is changed by ref too (so would need to be copied). */ /* pushes inside too. Changes x and o by reference, so not suitable in first arg when o hasn't been populated yet and x is an actual argument (hence check on o[0]). */ if (order != 1 || nalast != -1) // so that default case, i.e., order=1, nalast=FALSE will // not be affected (ex: `setkey`) for (int i = 0; i != n; ++i) x[i] = icheck(x[i]); iinsert(x, o, n); } else { /* Tighter range (e.g. copes better with a few abormally large values in some groups), but also, when setRange was once at arg level that caused an extra scan of (long) x first. 10,000 calls to setRange takes just 0.04s i.e. negligible. */ setRange(x, n); if (range == NA_INTEGER) Error("Internal error: isort passed all-NA. isorted should have caught this before this point"); int *target = (o[0] != -1) ? newo : o; // was range < 10000 for subgroups, but 1e5 for the first // arg, tried to generalise here. 1e4 rather than 1e5 here // because iterated was (thisgrpn < 200 || range > 20000) then // radix a short vector with large range can bite icount when // iterated (BLOCK 4 and 6) if (range <= N_RANGE && range <= n) { icount(x, target, n); } else { iradix(x, target, n); } } } static void dsort(double *x, int *o, int n) { if (n <= 2) { if (nalast == 0 && n == 2) { // don't have to twiddle here.. at least one will be NA // and 'n' WILL BE 2. if (o[0] == -1) { o[0] = 1; o[1] = 2; } for (int i = 0; i != n; ++i) { if (is_nan(x, i)) o[i] = 0; } // INCLUDED ?? push(1); push(1); return; } Error("Internal error: dsort received n=%d. dsorted should have dealt with this (e.g. as a reverse sorted vector) already",n); } if (n < N_SMALL && o[0] != -1 && nalast != 0) { // see comment above in iradix_r re N_SMALL=200, and isort for o[0] for (int i = 0; i != n; ++i) ((unsigned long long *)x)[i] = twiddle(x, i, order); // have to twiddle here anyways, can't speed up default case // like in isort dinsert((unsigned long long *)x, o, n); } else { dradix((unsigned char *) x, (o[0] != -1) ? newo : o, n); } } /* // SEXP attribute_hidden DT_radixsort(SEXP args) SEXP DT_radixsort(SEXP args) { int n = -1, narg = 0, ngrp, tmp, *osub, thisgrpn; R_xlen_t nl = n; Rboolean isSorted = TRUE, retGrp; void *xd; int *o = NULL; // ML: FIXME: Here are just two of the dangerous assumptions here if (sizeof(int) != 4) { error("radix sort assumes sizeof(int) == 4"); } if (sizeof(double) != 8) { error("radix sort assumes sizeof(double) == 8"); } nalast = (asLogical(CAR(args)) == NA_LOGICAL) ? 0 : (asLogical(CAR(args)) == TRUE) ? 1 : -1; // 1=TRUE, -1=FALSE, 0=NA args = CDR(args); SEXP decreasing = CAR(args); args = CDR(args); // If TRUE, return starts of runs of identical values + max group size. retGrp = asLogical(CAR(args)); args = CDR(args); // If FALSE, get order of strings in appearance order. Essentially // abuses the CHARSXP table to group strings without hashing // them. Only makes sense when retGrp=TRUE. sortStr = asLogical(CAR(args)); args = CDR(args); */ SEXP Cradixsort(SEXP NA_last, SEXP decreasing, SEXP RETstrt, SEXP RETgs, SEXP SORTStr, SEXP args) { int n = -1, narg = 0, ngrp, tmp, *osub, thisgrpn; R_xlen_t nl = n; Rboolean isSorted = TRUE, retGrp, retStarts; void *xd; int *o = NULL; // ML: FIXME: Here are just two of the dangerous assumptions here if (sizeof(int) != 4) { error("radix sort assumes sizeof(int) == 4"); } if (sizeof(double) != 8) { error("radix sort assumes sizeof(double) == 8"); } nalast = (asLogical(NA_last) == NA_LOGICAL) ? 0 : (asLogical(NA_last) == TRUE) ? 1 : -1; // 1=TRUE, -1=FALSE, 0=NA retStarts = asLogical(RETstrt); retGrp = retStarts || asLogical(RETgs); sortStr = asLogical(SORTStr); /* When grouping, we round off doubles to account for imprecision */ setNumericRounding(0); // before: retGrp ? 2 : 0 if (args == R_NilValue) return R_NilValue; if (isVector(CAR(args))) nl = XLENGTH(CAR(args)); for (SEXP ap = args; ap != R_NilValue; ap = CDR(ap), narg++) { if (!isVector(CAR(ap))) error("argument %d is not a vector", narg + 1); //Rprintf("%d, %d\n", XLENGTH(CAR(ap)), nl); if (XLENGTH(CAR(ap)) != nl) error("argument lengths differ"); } if (narg != length(decreasing)) error("length(decreasing) must match the number of order arguments"); for (int i = 0; i != narg; ++i) { if (LOGICAL(decreasing)[i] == NA_LOGICAL) error("'decreasing' elements must be TRUE or FALSE"); } order = asLogical(decreasing) ? -1 : 1; SEXP x = CAR(args); args = CDR(args); // (ML) FIXME: need to support long vectors if (nl > INT_MAX) { error("long vectors not supported"); } n = (int) nl; // upper limit for stack size (all size 1 groups). We'll detect // and avoid that limit, but if just one non-1 group (say 2), that // can't be avoided. gsmaxalloc = n; // once for the result, needs to be length n. // TO DO: save allocation if NULL is returned (isSorted = =TRUE) so // [i|c|d]sort know they can populate o directly with no working // memory needed to reorder existing order had to repace this from // '0' to '-1' because 'nalast = 0' replace 'o[.]' with 0 values. SEXP ans = PROTECT(allocVector(INTSXP, n)); o = INTEGER(ans); if (n > 0) o[0] = -1; xd = DATAPTR(x); stackgrps = narg > 1 || retGrp; if (TYPEOF(x) == STRSXP) { checkEncodings(x); } savetl_init(); // from now on use Error not error. switch (TYPEOF(x)) { case INTSXP: case LGLSXP: tmp = isorted(xd, n); break; case REALSXP : twiddle = &dtwiddle; is_nan = &dnan; tmp = dsorted(xd, n); break; case STRSXP : tmp = csorted(xd, n); break; default : Error("First arg is type '%s', not yet supported", type2char(TYPEOF(x))); } if (tmp) { // -1 or 1. NEW: or -2 in case of nalast == 0 and all NAs if (tmp == 1) { // same as expected in 'order' (1 = increasing, -1 = decreasing) isSorted = TRUE; for (int i = 0; i != n; ++i) o[i] = i + 1; } else if (tmp == -1) { // -1 (or -n for result of strcmp), strictly opposite to // -expected 'order' isSorted = FALSE; for (int i = 0; i != n; ++i) o[i] = n - i; } else if (nalast == 0 && tmp == -2) { // happens only when nalast=NA/0. Means all NAs, replace // with 0's therefore! isSorted = FALSE; for (int i = 0; i != n; ++i) o[i] = 0; } } else { isSorted = FALSE; switch (TYPEOF(x)) { case INTSXP: case LGLSXP: isort(xd, o, n); break; case REALSXP : dsort(xd, o, n); break; case STRSXP : if (sortStr) { csort_pre(xd, n); alloc_csort_otmp(n); csort(xd, o, n); } else cgroup(xd, o, n); break; default: Error ("Internal error: previous default should have caught unsupported type"); } } int maxgrpn = gsmax[flip]; // biggest group in the first arg void *xsub = NULL; // local int (*f) (); void (*g) (); if (narg > 1 && gsngrp[flip] < n) { // double is the largest type, 8 xsub = (void *) malloc(maxgrpn * sizeof(double)); if (xsub == NULL) Error("Couldn't allocate xsub in do_radixsort, requested %d * %d bytes.", maxgrpn, sizeof(double)); // global variable, used by isort, dsort, sort and cgroup newo = (int *) malloc(maxgrpn * sizeof(int)); if (newo == NULL) Error("Couldn't allocate newo in do_radixsort, requested %d * %d bytes.", maxgrpn, sizeof(int)); } for (int col = 2; col <= narg; col++) { x = CAR(args); args = CDR(args); xd = DATAPTR(x); ngrp = gsngrp[flip]; if (ngrp == n && nalast != 0) break; flipflop(); stackgrps = col != narg || retGrp; order = LOGICAL(decreasing)[col - 1] ? -1 : 1; switch (TYPEOF(x)) { case INTSXP: case LGLSXP: f = &isorted; g = &isort; break; case REALSXP: twiddle = &dtwiddle; is_nan = &dnan; f = &dsorted; g = &dsort; break; case STRSXP: f = &csorted; if (sortStr) { csort_pre(xd, n); alloc_csort_otmp(gsmax[1 - flip]); g = &csort; } // no increasing/decreasing order required if sortStr = FALSE, // just a dummy argument else g = &cgroup; break; default: Error("Arg %d is type '%s', not yet supported", col, type2char(TYPEOF(x))); } int i = 0; for (int grp = 0; grp != ngrp; ++grp) { thisgrpn = gs[1 - flip][grp]; if (thisgrpn == 1) { if (nalast == 0) { // this edge case had to be taken care of // here.. (see the bottom of this file for // more explanation) switch (TYPEOF(x)) { case INTSXP: if (INTEGER(x)[o[i] - 1] == NA_INTEGER) { isSorted = FALSE; o[i] = 0; } break; case LGLSXP: if (LOGICAL(x)[o[i] - 1] == NA_LOGICAL) { isSorted = FALSE; o[i] = 0; } break; case REALSXP: if (ISNAN(REAL(x)[o[i] - 1])) { isSorted = FALSE; o[i] = 0; } break; case STRSXP: if (STRING_ELT(x, o[i] - 1) == NA_STRING) { isSorted = FALSE; o[i] = 0; } break; default : Error("Internal error: previous default should have caught unsupported type"); } } i++; push(1); continue; } osub = o+i; // ** TO DO **: if isSorted, we can just point xsub // into x directly. If (*f)() returns 0, // though, will have to copy x at that point // When doing this, xsub could be allocated at // that point for the first time. if (TYPEOF(x) == STRSXP) for (int j = 0; j != thisgrpn; ++j) ((SEXP *) xsub)[j] = ((SEXP *) xd)[o[i++] - 1]; else if (TYPEOF(x) == REALSXP) for (int j = 0; j != thisgrpn; ++j) ((double *) xsub)[j] = ((double *) xd)[o[i++] - 1]; else for (int j = 0; j != thisgrpn; ++j) ((int *) xsub)[j] = ((int *) xd)[o[i++] - 1]; // continue; // BASELINE short circuit timing // point. Up to here is the cost of creating xsub. // [i|d|c]sorted(); very low cost, sequential tmp = (*f)(xsub, thisgrpn); if (tmp) { // *sorted will have already push()'d the groups if (tmp == -1) { isSorted = FALSE; for (int k = 0; k < thisgrpn / 2; k++) { // reverse the order in-place using no // function call or working memory // isorted only returns -1 for // _strictly_ decreasing order, // otherwise ties wouldn't be stable tmp = osub[k]; osub[k] = osub[thisgrpn - 1 - k]; osub[thisgrpn - 1 - k] = tmp; } } else if (nalast == 0 && tmp == -2) { // all NAs, replace osub[.] with 0s. isSorted = FALSE; for (int k = 0; k != thisgrpn; ++k) osub[k] = 0; } continue; } isSorted = FALSE; // nalast=NA will result in newo[0] = 0. So had to change to -1. newo[0] = -1; // may update osub directly, or if not will put the // result in global newo (*g)(xsub, osub, thisgrpn); if (newo[0] != -1) { if (nalast != 0) for (int j = 0; j != thisgrpn; ++j) // reuse xsub to reorder osub ((int *) xsub)[j] = osub[newo[j] - 1]; else for (int j = 0; j != thisgrpn; ++j) // final nalast case to handle! ((int *) xsub)[j] = (newo[j] == 0) ? 0 : osub[newo[j] - 1]; memcpy(osub, xsub, thisgrpn * sizeof(int)); } } } if (!sortStr && ustr_n != 0) Error("Internal error: at the end of do_radixsort sortStr == FALSE but ustr_n !=0 [%d]", ustr_n); for(int i = 0; i != ustr_n; ++i) SET_TRLEN(ustr[i], 0); maxlen = 1; // reset global. Minimum needed to count "" and NA ustr_n = 0; savetl_end(); free(ustr); ustr = NULL; ustr_alloc = 0; if (retGrp) { int maxgrpn = NA_INTEGER; ngrp = gsngrp[flip]; SEXP s_starts = retStarts ? install("starts") : install("group.sizes"); setAttrib(ans, s_starts, x = allocVector(INTSXP, ngrp)); int *px = INTEGER(x); // pointer -> http://adv-r.had.co.nz/C-interface.html if (retStarts && asLogical(RETgs)) { SEXP s_gs = install("group.sizes"); SEXP y; setAttrib(ans, s_gs, y = PROTECT(allocVector(INTSXP, ngrp))); // coerceVector(gs[flip], INTSXP)); Does not work, gs is integer array int *py = INTEGER(y); if (ngrp > 0) { int ngm1 = ngrp-1; px[0] = 1; py[ngm1] = gs[flip][ngm1]; for (int i = 0; i != ngm1; ++i) { py[i] = gs[flip][i]; px[i + 1] = px[i] + py[i]; } maxgrpn = gsmax[flip]; } UNPROTECT(1); // unprotects y !! } else if(retStarts) { if (ngrp > 0) { int ngm1 = ngrp-1; px[0] = 1; for (int i = 0; i != ngm1; ++i) { px[i + 1] = px[i] + gs[flip][i]; } maxgrpn = gsmax[flip]; } } else { if (ngrp > 0) { for (int i = 0; i != ngrp; ++i) { px[i] = gs[flip][i]; } maxgrpn = gsmax[flip]; } } SEXP s_maxgrpn = install("maxgrpn"); setAttrib(ans, s_maxgrpn, ScalarInteger(maxgrpn)); // Attribute indicating whether the vector was sorted !! // SEXP s_sorted = install("sorted"); // setAttrib(ans, s_sorted, ScalarLogical(isSorted)); // SEXP nms; // PROTECT(nms = allocVector(STRSXP, 2)); // SET_STRING_ELT(nms, 0, mkChar("grouping")); // SET_STRING_ELT(nms, 1, mkChar("integer")); // setAttrib(ans, R_ClassSymbol, nms); // UNPROTECT(1); } // Attribute indicating whether the vector was sorted !! -> always attach SEXP s_sorted = install("sorted"); setAttrib(ans, s_sorted, ScalarLogical(isSorted)); Rboolean dropZeros = !retGrp && !isSorted && nalast == 0; if (dropZeros) { int zeros = 0; for (int i = 0; i != n; ++i) { if (o[i] == 0) zeros++; } if (zeros > 0) { PROTECT(ans = allocVector(INTSXP, n - zeros)); int *o2 = INTEGER(ans); for (int i = 0, i2 = 0; i != n; ++i) { if (o[i] > 0) o2[i2++] = o[i]; } UNPROTECT(1); } } gsfree(); free(radix_xsub); radix_xsub=NULL; radix_xsuballoc=0; free(xsub); free(newo); xsub=newo=NULL; free(xtmp); xtmp=NULL; xtmp_alloc=0; free(otmp); otmp=NULL; otmp_alloc=0; free(csort_otmp); csort_otmp=NULL; csort_otmp_alloc=0; free(cradix_counts); cradix_counts=NULL; cradix_counts_alloc=0; free(cradix_xtmp); cradix_xtmp=NULL; cradix_xtmp_alloc=0; // TO DO: use xtmp already got UNPROTECT(1); return ans; } void Cdoubleradixsort(int *o, Rboolean NA_last, Rboolean decreasing, SEXP x) { int n = -1, tmp; R_xlen_t nl = n; void *xd; nalast = (NA_last) ? 1 : -1; // 1=TRUE, -1=FALSE /* When grouping, we round off doubles to account for imprecision */ setNumericRounding(0); if(!isVector(x)) error("x is not a vector"); nl = XLENGTH(x); order = (decreasing) ? -1 : 1; // (ML) FIXME: need to support long vectors if (nl > INT_MAX) error("long vectors not supported"); n = (int) nl; // if (n != LENGTH(o)) error("lengths of all arguments must match"); Cannot get length from pointer to first element!! // upper limit for stack size (all size 1 groups). We'll detect // and avoid that limit, but if just one non-1 group (say 2), that // can't be avoided. gsmaxalloc = n; // once for the result, needs to be length n. // TO DO: save allocation if NULL is returned (isSorted = =TRUE) so // [i|c|d]sort know they can populate o directly with no working // memory needed to reorder existing order had to repace this from // '0' to '-1' because 'nalast = 0' replace 'o[.]' with 0 values. if (n > 0) o[0] = -1; xd = DATAPTR(x); stackgrps = FALSE; // savetl_init(); // from now on use Error not error. twiddle = &dtwiddle; is_nan = &dnan; tmp = dsorted(xd, n); if (tmp) { // -1 or 1. if (tmp == 1) { // same as expected in 'order' (1 = increasing, -1 = decreasing) for (int i = 0; i != n; ++i) o[i] = i + 1; } else if (tmp == -1) { // -1 (or -n for result of strcmp), strictly opposite to -expected 'order' for (int i = 0; i != n; ++i) o[i] = n - i; } } else { dsort(xd, o, n); } // dsort(xd, o, n); maxlen = 1; // reset global. Minimum needed to count "" and NA //savetl_end(); // gsfree(); // ok?? free(radix_xsub); radix_xsub=NULL; radix_xsuballoc=0; // needed in dradix !! free(newo); newo=NULL; // also needed !! free(xtmp); xtmp=NULL; xtmp_alloc=0; // needed !! free(otmp); otmp=NULL; otmp_alloc=0; // needed !! // TO DO: use xtmp already got } collapse/src/fmean.cpp0000644000176200001440000006324114174223734014452 0ustar liggesusers#include using namespace Rcpp; // [[Rcpp::export]] NumericVector fmeanCpp(const NumericVector& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true) { int l = x.size(); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 if (Rf_isNull(w)) { // No weights if (ng == 0) { if(narm) { int j = l-1, n = 1; // 1 because for-loop starts from 2 // long double sum = x[j]; double sum = x[j]; while(std::isnan(sum) && j!=0) sum = x[--j]; if(j != 0) for(int i = j; i--; ) { if(std::isnan(x[i])) continue; sum += x[i]; // Fastest ? ++n; } sum = sum/n; return Rf_ScalarReal(sum); // :create((double)sum) } else { // long double sum = 0; double sum = 0; for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { sum = x[i]; break; } else { sum += x[i]; } } sum = sum/l; return Rf_ScalarReal(sum); // :create((double)sum) } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericVector sum(ng, NA_REAL); // Other way ? IntegerVector n(ng, 1); // could also do no_init_vector and then add n[g[i]-1] = 1 in fir if condition... -> Nope, that is slower for(int i = l; i--; ) { if(!std::isnan(x[i])) { // faster way to code this ? -> Not Bad at all -> index for g[i]-1? -> Nope, no noticeable improvement if(std::isnan(sum[g[i]-1])) sum[g[i]-1] = x[i]; else { sum[g[i]-1] += x[i]; ++n[g[i]-1]; } } } for(int i = ng; i--; ) sum[i] /= n[i]; // if(n[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); -> No check possible when initializing at 1 // SHALLOW_DUPLICATE_ATTRIB(sum, x); // Rf_copyMostAttrib(x, sum); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sum); // best ! // SHALLOW_DUPLICATE_ATTRIB(sum, x); // Rf_setAttrib(sum, R_NamesSymbol, R_NilValue); // } return sum; } else { NumericVector sum(ng); // no_init_vector // good? -> yes, but not initializing is numerically unstable.. int ngs = 0; if(Rf_isNull(gs)) { IntegerVector gsv(ng); if(gsv.size() != ng) stop("Vector of group-sizes must match number of groups"); for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { if(!std::isnan(sum[g[i]-1])) { sum[g[i]-1] = x[i]; ++ngs; if(ngs == ng) break; } } else { sum[g[i]-1] += x[i]; ++gsv[g[i]-1]; } } for(int i = ng; i--; ) sum[i] /= gsv[i]; // Adding n takes twice as long, } else { IntegerVector gsv = gs; if(gsv.size() != ng) stop("Vector of group-sizes must match number of groups"); for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { if(!std::isnan(sum[g[i]-1])) { sum[g[i]-1] = x[i]; ++ngs; if(ngs == ng) break; } } else { sum[g[i]-1] += x[i]; } } for(int i = ng; i--; ) { if(gsv[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); sum[i] /= gsv[i]; // This is good because adding n takes twice as long, if factor, supply gs = tabulate(f,nlevels(f)) } } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sum); // SHALLOW_DUPLICATE_ATTRIB(sum, x); return sum; } } } else { // With weights NumericVector wg = w; // wg(w) Identical speed if(l != wg.size()) stop("length(w) must match length(x)"); if (ng == 0) { if(narm) { int j = l-1; // 1 because for-loop starts from 2 while((std::isnan(x[j]) || std::isnan(wg[j])) && j!=0) --j; // This does not make a difference in performance but is more parsimonious. // long double sum = x[j]*wg[j], sumw = wg[j]; double sum = x[j]*wg[j], sumw = wg[j]; if(j != 0) for(int i = j; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i])) continue; sum += x[i]*wg[i]; // Fastest ?? sumw += wg[i]; } sum = sum/sumw; return Rf_ScalarReal(sum); // :create((double)sum) } else { // long double sum = 0, sumw = 0; double sum = 0, sumw = 0; for(int i = 0; i != l; ++i) { if(std::isnan(x[i]) || std::isnan(wg[i])) { // good, check both ? -> yes sum = x[i]+wg[i]; break; } else { sum += x[i]*wg[i]; sumw += wg[i]; } } sum = sum/sumw; return Rf_ScalarReal(sum); // :create((double)sum) } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericVector sum(ng, NA_REAL); // Other way ? -> Nope, this is as good as it gets NumericVector sumw(ng); // = no_init_vector(ng); // no init works!! but gives valgrind issue for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i])) continue; // faster way to code this ? -> Not Bad at all -> index for g[i]-1? -> Nope, no noticeable improvement if(std::isnan(sum[g[i]-1])) { sum[g[i]-1] = x[i]*wg[i]; sumw[g[i]-1] = wg[i]; } else { sum[g[i]-1] += x[i]*wg[i]; sumw[g[i]-1] += wg[i]; } } sum = sum/sumw; // good ? better return sum/sumw? -> Nope, slower if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sum); // SHALLOW_DUPLICATE_ATTRIB(sum, x); return sum; } else { NumericVector sum(ng), sumw(ng); // good? -> yes ! // = no_init_vector// Not initializing numerically unstable int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(x[i]) || std::isnan(wg[i])) { if(!std::isnan(sum[g[i]-1])) { sum[g[i]-1] = sumw[g[i]-1] = x[i]+wg[i]; // or NA_REAL ? -> Nope, good ++ngs; if(ngs == ng) break; } } else { sum[g[i]-1] += x[i]*wg[i]; sumw[g[i]-1] += wg[i]; } } sum = sum/sumw; if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sum); // SHALLOW_DUPLICATE_ATTRIB(sum, x); return sum; } } } } // [[Rcpp::export]] SEXP fmeanmCpp(const NumericMatrix& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, bool drop = true) { int l = x.nrow(), col = x.ncol(); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 if(Rf_isNull(w)) { // No weights if(ng == 0) { NumericVector sum = no_init_vector(col); // Initialize faster -> Nope if(narm) { for(int j = col; j--; ) { // Instead Am(j,_) you can use Am.row(j). NumericMatrix::ConstColumn column = x( _ , j); int k = l-1, nj = 1; // long double sumj = column[k]; double sumj = column[k]; while(std::isnan(sumj) && k!=0) sumj = column[--k]; if(k != 0) for(int i = k; i--; ) { if(std::isnan(column[i])) continue; sumj += column[i]; ++nj; } sumj = sumj/nj; sum[j] = sumj; // (double)sumj; } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); // long double sumj = 0; double sumj = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { sumj = column[i]; break; } else { sumj += column[i]; } } sumj = sumj/l; sum[j] = sumj; // (double)sumj; } } if(drop) Rf_setAttrib(sum, R_NamesSymbol, colnames(x)); else { Rf_dimgets(sum, Dimension(1, col)); // sum.attr("dimnames") = List::create(R_NilValue,colnames(x)); colnames(sum) = colnames(x); // NEW! faster than R ? -> yes, good if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sum); } return sum; } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericMatrix sum = no_init_matrix(ng, col); std::fill(sum.begin(), sum.end(), NA_REAL); // fastest ? or create vector and declare as matrix ? // NumericVector sumt(ng*col, NA_REAL); // A tiny speed gain, but not much !! Same memory efficiency // sumt.attr("dim") = Dimension(ng, col); // NumericMatrix sum = as(sumt); IntegerVector nj(ng); // = no_init_vector(ng); // better for valgrind for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column sumj = sum( _ , j); // int nj[ng]; // Numerically stable and faster and more memory efficient than before for(int i = l; i--; ) { if(!std::isnan(column[i])) { if(std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; nj[g[i]-1] = 1; } else { sumj[g[i]-1] += column[i]; ++nj[g[i]-1]; } } } for(int i = ng; i--; ) sumj[i] /= nj[i]; // if(gsv[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); cant check when not initializing } colnames(sum) = colnames(x); // efficient if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sum); return sum; } else { NumericMatrix sum(ng, col); // no init numerically unstable if(Rf_isNull(gs)) { // int gsv[ng], memsize = sizeof(int)*ng; for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column sumj = sum( _ , j); // memset(gsv, 0, memsize); // still a tiny bit faster than std::vector, but both have the same memory efficiency std::vector gsv(ng); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]; ++gsv[g[i]-1]; } } for(int i = ng; i--; ) sumj[i] /= gsv[i]; } } else { IntegerVector gsv = gs; if(gsv.size() != ng) stop("Vector of group-sizes must match number of groups"); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column sumj = sum( _ , j); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]; } } for(int i = ng; i--; ) { if(gsv[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); sumj[i] /= gsv[i]; } } } colnames(sum) = colnames(x); // quite efficient if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sum); return sum; } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match nrow(X)"); if(ng == 0) { NumericVector sum = no_init_vector(col); // Initialize faster -> Nope if(narm) { for(int j = col; j--; ) { // Instead Am(j,_) you can use Am.row(j). NumericMatrix::ConstColumn column = x( _ , j); int k = l-1; while((std::isnan(column[k]) || std::isnan(wg[k])) && k!=0) --k; // long double sumj = column[k]*wg[k], sumwj = wg[k]; double sumj = column[k]*wg[k], sumwj = wg[k]; if(k != 0) for(int i = k; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; sumj += column[i]*wg[i]; sumwj += wg[i]; } sumj = sumj/sumwj; sum[j] = sumj; // (double)sumj; } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); // long double sumj = 0, sumwj = 0; double sumj = 0, sumwj = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { sumj = column[i]+wg[i]; break; } else { sumj += column[i]*wg[i]; sumwj += wg[i]; } } sumj = sumj/sumwj; sum[j] = sumj; // (double)sumj; } } if(drop) Rf_setAttrib(sum, R_NamesSymbol, colnames(x)); // sum.attr("names") = colnames(x); else { Rf_dimgets(sum, Dimension(1, col)); // sum.attr("dimnames") = List::create(R_NilValue,colnames(x)); colnames(sum) = colnames(x); // NEW! faster than R ? -> yes, good if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sum); } return sum; } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericMatrix sum = no_init_matrix(ng, col); std::fill(sum.begin(), sum.end(), NA_REAL); // NumericMatrix sumw = no_init_matrix(ng, col); // Numerically stable ? -> Yes NumericVector sumwj(ng); // = no_init_vector(ng); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column sumj = sum( _ , j); // NumericVector sumwj = no_init_vector(ng); // NumericMatrix::Column sumwj = sumw( _ , j); // double sumwj[ng]; // Numerically stable, Slightly faster and a lot more memory efficient (but long double is a lot slower) for(int i = l; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; if(std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]*wg[i]; sumwj[g[i]-1] = wg[i]; } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; } } for(int i = ng; i--; ) sumj[i] /= sumwj[i]; // sumj = sumj/sumwj; // This gives error because sumj is matrix column ! } colnames(sum) = colnames(x); // quite efficient if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sum); return sum; } else { NumericMatrix sum(ng, col); // no init numerically unstable // NumericMatrix sumw(ng, col); // also here ? -> Nope // double sumwj[ng]; // Also a bit faster and a lot more memory efficient // int memsize = sizeof(double)*ng; for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column sumj = sum( _ , j); // NumericMatrix::Column sumwj = sumw( _ , j); std::vector sumwj(ng); // memset(sumwj, 0, memsize); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = sumwj[g[i]-1] = column[i]+wg[i]; // or NA_REAL ? -> Nope, good ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; } } for(int i = ng; i--; ) sumj[i] /= sumwj[i]; // sumj = sumj/sumwj; // This gives erriir because sumj is matrix column } colnames(sum) = colnames(x); // quite efficient if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sum); return sum; } } } } // [[Rcpp::export]] SEXP fmeanlCpp(const List& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, bool drop = true) { int l = x.size(); if(Rf_length(x[0]) == 0) stop("empty dataset."); if(Rf_isNull(w)) { // No weights if(ng == 0) { NumericVector sum(l); // not initializing not faster WIth NWDI (35 instead of 32 milliseconds) if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; int k = column.size()-1, ni = 1; // long double sumi = column[k]; // long double gives 45 instead of 35 milliseconds double sumi = column[k]; while(std::isnan(sumi) && k!=0) sumi = column[--k]; if(k != 0) for(int i = k; i--; ) { if(std::isnan(column[i])) continue; sumi += column[i]; ++ni; } sumi = sumi/ni; sum[j] = sumi; // (double)sumi; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; // long double sumi = 0; double sumi = 0; int row = column.size(); for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) { sumi = column[i]; break; } else { sumi += column[i]; } } sumi = sumi/row; sum[j] = sumi; // (double)sumi; } } if(drop) { Rf_setAttrib(sum, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return sum; } else { List out(l); for(int j = l; j--; ) { out[j] = sum[j]; SHALLOW_DUPLICATE_ATTRIB(out[j], x[j]); } SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_RowNamesSymbol, Rf_ScalarInteger(1)); // out.attr("row.names") = 1; return out; } } else { // With groups List sum(l); int gss = g.size(); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector sumj(ng, NA_REAL); // IntegerVector nj(ng, 1); // faster than no_init_vector ? -> good, cannot divide by interger 0, also else numerically unstable and no speed loss std::vector nj(ng, 1); // better memory allocation, and nearly same speed as integer array -> doesn't work because sets all byte to 1 -> https://stackoverflow.com/questions/14761015/memset-an-array-to-1 for(int i = gss; i--; ) { if(!std::isnan(column[i])) { // faster way to code this ? -> Not Bad at all, 54.. millisec for WDIM if(std::isnan(sumj[g[i]-1])) sumj[g[i]-1] = column[i]; else { sumj[g[i]-1] += column[i]; ++nj[g[i]-1]; } } } for(int i = ng; i--; ) sumj[i] /= nj[i]; SHALLOW_DUPLICATE_ATTRIB(sumj, column); sum[j] = sumj; } } else { if(Rf_isNull(gs)) { // int gsv[ng], memsize = sizeof(int)*ng; for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector sumj(ng); // = no_init_vector // Not initializing seems to be numerically unstable std::vector gsv(ng); // memset(gsv, 0, memsize); int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(column[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]; ++gsv[g[i]-1]; } } for(int i = ng; i--; ) sumj[i] /= gsv[i]; SHALLOW_DUPLICATE_ATTRIB(sumj, column); sum[j] = sumj; } } else { IntegerVector gsv = gs; if(gsv.size() != ng) stop("Vector of group-sizes must match number of groups"); for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector sumj(ng); // = no_init_vector // Not initializing seems to be numerically unstable int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(column[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]; } } for(int i = ng; i--; ) { if(gsv[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); sumj[i] /= gsv[i]; } SHALLOW_DUPLICATE_ATTRIB(sumj, column); sum[j] = sumj; } } } SHALLOW_DUPLICATE_ATTRIB(sum, x); Rf_setAttrib(sum, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return sum; } } else { // With weights NumericVector wg = w; // wg(w) // No speed loss ? -> Yes, and possibly even faster int wgs = wg.size(); if (ng == 0) { NumericVector sum(l); // not initializing not faster WIth NWDI (35 instead of 32 milliseconds) if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(column.size() != wgs) stop("length(w) must match nrow(X)"); // Really necessary ? int k = wgs-1; while((std::isnan(column[k]) || std::isnan(wg[k])) && k!=0) --k; // long double sumi = column[k]*wg[k], sumwi = wg[k]; double sumi = column[k]*wg[k], sumwi = wg[k]; if(k != 0) for(int i = k; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; sumi += column[i]*wg[i]; sumwi += wg[i]; } sumi = sumi/sumwi; sum[j] = sumi; // (double)sumi; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; if(column.size() != wgs) stop("length(w) must match nrow(X)"); // Really necessary ? // long double sumi = 0, sumwi = 0; double sumi = 0, sumwi = 0; for(int i = 0; i != wgs; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { sumi = column[i]+wg[i]; break; } else { sumi += column[i]*wg[i]; sumwi += wg[i]; } } sumi = sumi/sumwi; sum[j] = sumi; // (double)sumi; } } if(drop) { Rf_setAttrib(sum, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); // sum.attr("names") = Rf_getAttrib(x, R_NamesSymbol); return sum; } else { List out(l); for(int j = l; j--; ) { out[j] = sum[j]; SHALLOW_DUPLICATE_ATTRIB(out[j], x[j]); } SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_RowNamesSymbol, Rf_ScalarInteger(1)); // out.attr("row.names") = 1; return out; } } else { // With groups List sum(l); int gss = g.size(); if(wgs != gss) stop("length(w) must match length(g)"); if(narm) { NumericVector sumwj(ng); // = no_init_vector(ng); // stable and faster ? for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector sumj(ng, NA_REAL); // no_init_vector is faster and stable (you only divide by it every round) // double sumwj[ng]; for(int i = gss; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; if(std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]*wg[i]; sumwj[g[i]-1] = wg[i]; } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; } } sumj = sumj/sumwj; // for(int i = ng; i--; ) sumj[i] /= sumwj[i]; SHALLOW_DUPLICATE_ATTRIB(sumj, column); sum[j] = sumj; } } else { // double sumwj[ng]; // int memsize = sizeof(double)*ng; for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector sumj(ng), sumwj(ng); // = no_init_vector // Not initializing seems to be numerically unstable // NumericVector sumwj(ng); // Also here not initializing is numerically unstable // memset(sumwj, 0, memsize); int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = sumwj[g[i]-1] = column[i]+wg[i]; // or NA_REAL ? -> Nope, good ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; } } sumj = sumj/sumwj; // for(int i = ng; i--; ) sumj[i] /= sumwj[i]; SHALLOW_DUPLICATE_ATTRIB(sumj, column); sum[j] = sumj; } } SHALLOW_DUPLICATE_ATTRIB(sum, x); Rf_setAttrib(sum, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return sum; } } } collapse/src/collapse_c.h0000644000176200001440000000061714166330702015126 0ustar liggesusers#include #include #define SEXPPTR(x) ((SEXP *)DATAPTR(x)) // to avoid overhead of looped VECTOR_ELT #define NISNAN(x) ((x) == (x)) // opposite of ISNAN for doubles // Faster than Rinternals version (which uses math library version) #undef ISNAN #define ISNAN(x) ((x) != (x)) void matCopyAttr(SEXP out, SEXP x, SEXP Rdrop, int ng); void DFcopyAttr(SEXP out, SEXP x, int ng); collapse/src/small_helper.c0000644000176200001440000011610714201327077015467 0ustar liggesusers#include "collapse_c.h" // #include "data.table.h" // #ifndef USE_RINTERNALS // #define USE_RINTERNALS // #endif void matCopyAttr(SEXP out, SEXP x, SEXP Rdrop, int ng) { SEXP dn = getAttrib(x, R_DimNamesSymbol); SEXP cn = isNull(dn) ? R_NilValue : VECTOR_ELT(dn, 1); // PROTECT ?? if(ng == 0 && asLogical(Rdrop)) { if(length(cn)) setAttrib(out, R_NamesSymbol, cn); } else { int nprotect = 1; SEXP dim = PROTECT(duplicate(getAttrib(x, R_DimSymbol))); INTEGER(dim)[0] = ng == 0 ? 1 : ng; dimgets(out, dim); if(length(cn)) { ++nprotect; SEXP dn = PROTECT(allocVector(VECSXP, 2)); SET_VECTOR_ELT(dn, 0, R_NilValue); SET_VECTOR_ELT(dn, 1, cn); dimnamesgets(out, dn); } if(!isObject(x)) copyMostAttrib(x, out); UNPROTECT(nprotect); } } void DFcopyAttr(SEXP out, SEXP x, int ng) { SHALLOW_DUPLICATE_ATTRIB(out, x); if(ng == 0) { setAttrib(out, R_RowNamesSymbol, ScalarInteger(1)); } else { SEXP rn = PROTECT(allocVector(INTSXP, 2)); // Needed here, now unsafe to pass uninitialized vectors to R_RowNamesSymbol. INTEGER(rn)[0] = NA_INTEGER; INTEGER(rn)[1] = -ng; setAttrib(out, R_RowNamesSymbol, rn); UNPROTECT(1); } } SEXP geteptr(SEXP x) { return R_ExternalPtrProtected(x); } // Faster than rep_len(value, n) and slightly faster than matrix(value, n) (which in turn is faster than rep_len)... SEXP falloc(SEXP value, SEXP n) { int l = asInteger(n), tval = TYPEOF(value); if(length(value) > 1) error("Must supply a single value to alloc()"); SEXP out = PROTECT(allocVector(tval, l)); switch(tval) { case INTSXP: case LGLSXP: { int val = asInteger(value), *pout = INTEGER(out); if(val == 0) memset(pout, 0, l*sizeof(int)); else for(int i = 0; i != l; ++i) pout[i] = val; break; } case REALSXP: { double val = asReal(value), *pout = REAL(out); if(val == 0.0) memset(pout, 0.0, l*sizeof(double)); else for(int i = 0; i != l; ++i) pout[i] = val; break; } case STRSXP: { SEXP val = asChar(value), *pout = STRING_PTR(out); for(int i = 0; i != l; ++i) pout[i] = val; break; } case VECSXP: { SEXP *pout = SEXPPTR(out); for(int i = 0; i != l; ++i) pout[i] = value; break; } default: error("Not supportd SEXP Type in alloc()"); } copyMostAttrib(value, out); UNPROTECT(1); return out; } SEXP groups2GRP(SEXP x, SEXP lx, SEXP gs) { int l = length(x); SEXP out = PROTECT(allocVector(INTSXP, asInteger(lx))); int *pout = INTEGER(out)-1, *pgs = INTEGER(gs); // SEXP *px = VECTOR_PTR(x); // -> Depreciated interface: https://github.com/hadley/r-internals/blob/ea892fa79bbffe961e78dbe9c90ce4ca3bf2d9bc/vectors.md // Matt Dowle Commented: // VECTOR_PTR does exist but returns 'not safe to return vector pointer' when USE_RINTERNALS is not defined. // VECTOR_DATA and LIST_POINTER exist too but call VECTOR_PTR. All are clearly not intended to be used by packages. // The concern is overhead inside VECTOR_ELT() biting when called repetitively in a loop like we do here. That's why // we take the R API (INTEGER()[i], REAL()[i], etc) outside loops for the simple types even when not parallel. For this // type list case (VECSXP) it might be that some items are ALTREP for example, so we really should use the heavier // _ELT accessor (VECTOR_ELT) inside the loop in this case. SEXP *px = SEXPPTR(x); for(int j = l; j--; ) { // This can go in any direction.. // SEXP column = VECTOR_ELT(x, j); int *pcolumn = INTEGER(px[j]), jp = j+1; for(int i = pgs[j]; i--; ) pout[pcolumn[i]] = jp; // This can go in any direction... } UNPROTECT(1); return out; } // Faster version of base R's spit based on grouping objects.. // TODO: Support DF's !! And check attribute preservation !! // -> works for factors, Date and POSIXct, but not for POSIXlt SEXP gsplit(SEXP x, SEXP gobj, SEXP toint) { if(TYPEOF(gobj) != VECSXP || !inherits(gobj, "GRP")) error("g needs to be an object of class 'GRP', see ?GRP"); const SEXP g = VECTOR_ELT(gobj, 1), gs = VECTOR_ELT(gobj, 2), ord = VECTOR_ELT(gobj, 5), order = VECTOR_ELT(gobj, 6); const int ng = length(gs), *pgs = INTEGER(gs), tx = TYPEOF(x), l = length(g); if(ng != INTEGER(VECTOR_ELT(gobj, 0))[0]) error("'GRP' object needs to have valid vector of group-sizes"); SEXP res = PROTECT(allocVector(VECSXP, ng)); // Output as integer or not if(asLogical(toint)) { for(int i = 0; i != ng; ++i) SET_VECTOR_ELT(res, i, allocVector(INTSXP, pgs[i])); } else { // Allocate split vectors and copy attributes and object bits SEXP x1 = PROTECT(allocVector(tx, 1)); copyMostAttrib(x, x1); SEXP ax = ATTRIB(x1); if(length(ax) == 1 && TAG(ax) == install("label")) ax = R_NilValue; int ox = OBJECT(x); // FAZIT: Need to use SET_VECTOR_ELT!! pres[i] = allocVector() doesn't work!! if(TYPEOF(ax) != NILSXP && ox != 0) { for(int i = 0, s4o = IS_S4_OBJECT(x); i != ng; ++i) { SEXP resi; SET_VECTOR_ELT(res, i, resi = allocVector(tx, pgs[i])); SET_ATTRIB(resi, ax); SET_OBJECT(resi, ox); if(s4o) SET_S4_OBJECT(resi); } } else if(TYPEOF(ax) != NILSXP) { for(int i = 0; i != ng; ++i) { SEXP resi; SET_VECTOR_ELT(res, i, resi = allocVector(tx, pgs[i])); // SET_ATTRIB(pres[i] = allocVector(tx, pgs[i]), ax); SET_ATTRIB(resi, ax); } } else if(ox != 0) { // Is this even possible? Object bits but no attributes? for(int i = 0, s4o = IS_S4_OBJECT(x); i != ng; ++i) { SEXP resi; SET_VECTOR_ELT(res, i, resi = allocVector(tx, pgs[i])); SET_OBJECT(resi, ox); if(s4o) SET_S4_OBJECT(resi); } } else { for(int i = 0; i != ng; ++i) SET_VECTOR_ELT(res, i, allocVector(tx, pgs[i])); } UNPROTECT(1); } SEXP *pres = SEXPPTR(res); // If grouping is sorted if(LOGICAL(ord)[1] == 1) { // This only works if data is already ordered in order of the groups int count = 0; if(asLogical(toint)) { for(int j = 0; j != ng; ++j) { int *pgj = INTEGER(pres[j]), gsj = pgs[j]; for(int i = 0; i != gsj; ++i) pgj[i] = ++count; } } else { if(length(x) != l) error("length(x) must match length(g)"); switch(tx) { case INTSXP: case LGLSXP: { const int *px = INTEGER(x); for(int j = 0; j != ng; ++j) { int *pgj = INTEGER(pres[j]), gsj = pgs[j]; for(int i = 0; i != gsj; ++i) pgj[i] = px[count++]; } break; } case REALSXP: { const double *px = REAL(x); for(int j = 0, gsj; j != ng; ++j) { double *pgj = REAL(pres[j]); gsj = pgs[j]; for(int i = 0; i != gsj; ++i) pgj[i] = px[count++]; } break; } case CPLXSXP: { const Rcomplex *px = COMPLEX(x); for(int j = 0, gsj; j != ng; ++j) { Rcomplex *pgj = COMPLEX(pres[j]); gsj = pgs[j]; for(int i = 0; i != gsj; ++i) pgj[i] = px[count++]; } break; } case STRSXP: { const SEXP *px = STRING_PTR(x); for(int j = 0, gsj; j != ng; ++j) { SEXP *pgj = STRING_PTR(pres[j]); gsj = pgs[j]; for(int i = 0; i != gsj; ++i) pgj[i] = px[count++]; } break; } case VECSXP: { const SEXP *px = SEXPPTR(x); for(int j = 0, gsj; j != ng; ++j) { SEXP *pgj = SEXPPTR(pres[j]); gsj = pgs[j]; for(int i = 0; i != gsj; ++i) pgj[i] = px[count++]; } break; } case RAWSXP: { const Rbyte *px = RAW(x); for(int j = 0, gsj; j != ng; ++j) { Rbyte *pgj = RAW(pres[j]); gsj = pgs[j]; for(int i = 0; i != gsj; ++i) pgj[i] = px[count++]; } break; } default: error("Unsupported type '%s' passed to gsplit", type2char(tx)); } } } else if(length(order) == l) { // Grouping not sorted but we have the ordering.. SEXP sym_starts = PROTECT(install("starts")); const SEXP starts = getAttrib(order, sym_starts); UNPROTECT(1); if(length(starts) != ng) goto unsno; const int *po = INTEGER(order), *ps = INTEGER(starts); if(asLogical(toint)) { for(int i = 0; i != ng; ++i) { int *pri = INTEGER(pres[i]); for(int j = ps[i]-1, end = ps[i]+pgs[i]-1, k = 0; j < end; j++) pri[k++] = po[j]; } } else { if(length(x) != l) error("length(x) must match length(g)"); switch(tx) { case INTSXP: case LGLSXP: { const int *px = INTEGER(x); for(int i = 0; i != ng; ++i) { int *pri = INTEGER(pres[i]); for(int j = ps[i]-1, end = ps[i]+pgs[i]-1, k = 0; j < end; ++j) pri[k++] = px[po[j]-1]; } break; } case REALSXP: { double *px = REAL(x); for(int i = 0; i != ng; ++i) { double *pri = REAL(pres[i]); for(int j = ps[i]-1, end = ps[i]+pgs[i]-1, k = 0; j < end; ++j) pri[k++] = px[po[j]-1]; } break; } case CPLXSXP: { Rcomplex *px = COMPLEX(x); for(int i = 0; i != ng; ++i) { Rcomplex *pri = COMPLEX(pres[i]); for(int j = ps[i]-1, end = ps[i]+pgs[i]-1, k = 0; j < end; ++j) pri[k++] = px[po[j]-1]; } break; } case STRSXP: { SEXP *px = STRING_PTR(x); for(int i = 0; i != ng; ++i) { SEXP *pri = STRING_PTR(pres[i]); for(int j = ps[i]-1, end = ps[i]+pgs[i]-1, k = 0; j < end; ++j) pri[k++] = px[po[j]-1]; } break; } case VECSXP: { SEXP *px = SEXPPTR(x); for(int i = 0; i != ng; ++i) { SEXP *pri = SEXPPTR(pres[i]); for(int j = ps[i]-1, end = ps[i]+pgs[i]-1, k = 0; j < end; ++j) pri[k++] = px[po[j]-1]; } break; } case RAWSXP: { const Rbyte *px = RAW(x); for(int i = 0; i != ng; ++i) { Rbyte *pri = RAW(pres[i]); for(int j = ps[i]-1, end = ps[i]+pgs[i]-1, k = 0; j < end; ++j) pri[k++] = px[po[j]-1]; } break; } default: error("Unsupported type '%s' passed to gsplit", type2char(tx)); } } } else { // Unsorted, without ordering unsno:; int *count = (int*)Calloc(ng, int); // memset(count, 0, sizeof(int)*(ng+1)); // Needed here ?? // int *count = (int *) R_alloc(ng+1, sizeof(int)); const int *pg = INTEGER(g); // --pres; if(asLogical(toint)) { for(int i = 0, gi; i != l; ++i) { gi = pg[i]-1; INTEGER(pres[gi])[count[gi]++] = i+1; } } else { if(length(x) != l) error("length(x) must match length(g)"); switch(tx) { case INTSXP: case LGLSXP: { const int *px = INTEGER(x); for(int i = 0, gi; i != l; ++i) { gi = pg[i]-1; INTEGER(pres[gi])[count[gi]++] = px[i]; } break; } case REALSXP: { const double *px = REAL(x); for(int i = 0, gi; i != l; ++i) { gi = pg[i]-1; REAL(pres[gi])[count[gi]++] = px[i]; } break; } case CPLXSXP: { const Rcomplex *px = COMPLEX(x); for(int i = 0, gi; i != l; ++i) { gi = pg[i]-1; COMPLEX(pres[gi])[count[gi]++] = px[i]; } break; } case STRSXP: { const SEXP *px = STRING_PTR(x); for(int i = 0, gi; i != l; ++i) { gi = pg[i]-1; STRING_PTR(pres[gi])[count[gi]++] = px[i]; } break; } case VECSXP: { const SEXP *px = SEXPPTR(x); for(int i = 0, gi; i != l; ++i) { gi = pg[i]-1; SEXPPTR(pres[gi])[count[gi]++] = px[i]; } break; } case RAWSXP: { const Rbyte *px = RAW(x); for(int i = 0, gi; i != l; ++i) { gi = pg[i]-1; RAW(pres[gi])[count[gi]++] = px[i]; } break; } default: error("Unsupported type '%s' passed to gsplit", type2char(tx)); } } Free(count); } UNPROTECT(1); return res; } // This is for fmutate, to reorder the result of grouped data if the result has the same length as x SEXP greorder(SEXP x, SEXP gobj) { if(TYPEOF(gobj) != VECSXP || !inherits(gobj, "GRP")) error("g needs to be an object of class 'GRP', see ?GRP"); if(LOGICAL(VECTOR_ELT(gobj, 5))[1] == 1) return x; const SEXP g = VECTOR_ELT(gobj, 1), gs = VECTOR_ELT(gobj, 2), order = VECTOR_ELT(gobj, 6); const int ng = length(gs), l = length(g), tx = TYPEOF(x), *pgs = INTEGER(gs), *pg = INTEGER(g); if(ng != INTEGER(VECTOR_ELT(gobj, 0))[0]) error("'GRP' object needs to have valid vector of group-sizes"); if(l != length(x)) error("length(x) must match length(g)"); SEXP res = PROTECT(allocVector(tx, l)); // Note: This is only faster for a large number of groups... if(length(order) == l) { // Grouping not sorted but we have the ordering.. SEXP sym_starts = PROTECT(install("starts")); const SEXP starts = getAttrib(order, sym_starts); UNPROTECT(1); if(length(starts) != ng) goto unsno2; const int *po = INTEGER(order), *ps = INTEGER(starts); switch(tx) { case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pr = INTEGER(res); for(int i = 0, k = 0; i != ng; ++i) { for(int j = ps[i]-1, end = ps[i]+pgs[i]-1; j < end; ++j) pr[po[j]-1] = px[k++]; } break; } case REALSXP: { double *px = REAL(x), *pr = REAL(res); for(int i = 0, k = 0; i != ng; ++i) { for(int j = ps[i]-1, end = ps[i]+pgs[i]-1; j < end; ++j) pr[po[j]-1] = px[k++]; } break; } case CPLXSXP: { Rcomplex *px = COMPLEX(x), *pr = COMPLEX(res); for(int i = 0, k = 0; i != ng; ++i) { for(int j = ps[i]-1, end = ps[i]+pgs[i]-1; j < end; ++j) pr[po[j]-1] = px[k++]; } break; } case STRSXP: { SEXP *px = STRING_PTR(x), *pr = STRING_PTR(res); for(int i = 0, k = 0; i != ng; ++i) { for(int j = ps[i]-1, end = ps[i]+pgs[i]-1; j < end; ++j) pr[po[j]-1] = px[k++]; } break; } case VECSXP: { SEXP *px = SEXPPTR(x), *pr = SEXPPTR(res); for(int i = 0, k = 0; i != ng; ++i) { for(int j = ps[i]-1, end = ps[i]+pgs[i]-1; j < end; ++j) pr[po[j]-1] = px[k++]; } break; } case RAWSXP: { Rbyte *px = RAW(x), *pr = RAW(res); for(int i = 0, k = 0; i != ng; ++i) { for(int j = ps[i]-1, end = ps[i]+pgs[i]-1; j < end; ++j) pr[po[j]-1] = px[k++]; } break; } default: error("Unsupported type '%s' passed to gsplit", type2char(tx)); } } else { // Unsorted, without ordering unsno2:; int *count = (int *) R_alloc(ng+1, sizeof(int)); int *cgs = (int *) R_alloc(ng+2, sizeof(int)); cgs[1] = 0; for(int i = 0; i != ng; ++i) { count[i+1] = 0; cgs[i+2] = cgs[i+1] + pgs[i]; } switch(tx) { case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pr = INTEGER(res); for(int i = 0; i != l; ++i) pr[i] = px[cgs[pg[i]]+count[pg[i]]++]; break; } case REALSXP: { double *px = REAL(x), *pr = REAL(res); for(int i = 0; i != l; ++i) pr[i] = px[cgs[pg[i]]+count[pg[i]]++]; break; } case CPLXSXP: { Rcomplex *px = COMPLEX(x), *pr = COMPLEX(res); for(int i = 0; i != l; ++i) pr[i] = px[cgs[pg[i]]+count[pg[i]]++]; break; } case STRSXP: { SEXP *px = STRING_PTR(x), *pr = STRING_PTR(res); for(int i = 0; i != l; ++i) pr[i] = px[cgs[pg[i]]+count[pg[i]]++]; break; } case VECSXP: { SEXP *px = SEXPPTR(x), *pr = SEXPPTR(res); for(int i = 0; i != l; ++i) pr[i] = px[cgs[pg[i]]+count[pg[i]]++]; break; } case RAWSXP: { Rbyte *px = RAW(x), *pr = RAW(res); for(int i = 0; i != l; ++i) pr[i] = px[cgs[pg[i]]+count[pg[i]]++]; break; } default: error("Unsupported type '%s' passed to gsplit", type2char(tx)); } } SHALLOW_DUPLICATE_ATTRIB(res, x); UNPROTECT(1); return res; } // Note: Only supports numeric data!!!! SEXP lassign(SEXP x, SEXP s, SEXP rows, SEXP fill) { int l = length(x), tr = TYPEOF(rows), ss = asInteger(s), rs = LENGTH(rows); SEXP out = PROTECT(allocVector(VECSXP, l)); // SEXP *px = VECTOR_PTR(x); // -> Depreciated interface: https://github.com/hadley/r-internals/blob/ea892fa79bbffe961e78dbe9c90ce4ca3bf2d9bc/vectors.md SEXP *px = SEXPPTR(x); double dfill = asReal(fill); if(tr == INTSXP) { int *rowsv = INTEGER(rows); //, vs = ss * sizeof(double); for(int j = l; j--; ) { SEXP column = px[j]; // VECTOR_ELT(x, j); if(length(column) != rs) error("length(rows) must match nrow(x)"); SEXP outj; SET_VECTOR_ELT(out, j, outj = allocVector(REALSXP, ss)); double *pcolumn = REAL(column), *poutj = REAL(outj); // memset(poutj, dfill, vs); // cannot memset missing values... can only memset 0 for(int i = ss; i--; ) poutj[i] = dfill; for(int i = 0; i != rs; ++i) poutj[rowsv[i]-1] = pcolumn[i]; SHALLOW_DUPLICATE_ATTRIB(outj, column); } } else if(tr == LGLSXP) { int *rowsv = LOGICAL(rows); if(ss != rs) error("length(rows) must match length(s) if rows is a logical vector"); for(int j = l; j--; ) { SEXP column = px[j]; // VECTOR_ELT(x, j); SEXP outj; SET_VECTOR_ELT(out, j, outj = allocVector(REALSXP, ss)); double *pcolumn = REAL(column), *poutj = REAL(outj); for(int i = 0, k = 0; i != rs; ++i) poutj[i] = rowsv[i] ? pcolumn[k++] : dfill; SHALLOW_DUPLICATE_ATTRIB(outj, column); } } else error("rows must be positive integers or a logical vector"); SHALLOW_DUPLICATE_ATTRIB(out, x); UNPROTECT(1); return out; } SEXP Cna_rm(SEXP x) { const int n = LENGTH(x); if (n < 1) return x; int k = 0; switch(TYPEOF(x)) { case LGLSXP: case INTSXP: { const int *xd = INTEGER(x); for (int i = 0; i != n; ++i) if(xd[i] == NA_INTEGER) ++k; if(k == 0) return x; SEXP out = PROTECT(allocVector(TYPEOF(x), n - k)); int *pout = INTEGER(out); k = 0; for (int i = 0; i != n; ++i) if(xd[i] != NA_INTEGER) pout[k++] = xd[i]; copyMostAttrib(x, out); UNPROTECT(1); return out; } case REALSXP: { // What about integer64?? const double *xd = REAL(x); for (int i = 0; i != n; ++i) if(ISNAN(xd[i])) ++k; if(k == 0) return x; SEXP out = PROTECT(allocVector(REALSXP, n - k)); double *pout = REAL(out); k = 0; for (int i = 0; i != n; ++i) if(NISNAN(xd[i])) pout[k++] = xd[i]; // using xd[i] == xd[i] is not faster !! copyMostAttrib(x, out); UNPROTECT(1); return out; } case STRSXP: { const SEXP *xd = STRING_PTR(x); for (int i = 0; i != n; ++i) if(xd[i] == NA_STRING) ++k; if(k == 0) return x; SEXP out = PROTECT(allocVector(STRSXP, n - k)); SEXP *pout = STRING_PTR(out); k = 0; for (int i = 0; i != n; ++i) if(xd[i] != NA_STRING) pout[k++] = xd[i]; copyMostAttrib(x, out); UNPROTECT(1); return out; } case VECSXP: { const SEXP *xd = SEXPPTR(x); for (int i = 0; i != n; ++i) if(length(xd[i]) == 0) ++k; if(k == 0) return x; SEXP out = PROTECT(allocVector(VECSXP, n - k)); SEXP *pout = SEXPPTR(out); k = 0; for (int i = 0; i != n; ++i) if(length(xd[i]) != 0) pout[k++] = xd[i]; copyMostAttrib(x, out); UNPROTECT(1); return out; } } error("Unsupported type '%s' passed to na_rm()", type2char(TYPEOF(x))); } // Helper function to find a single sting in factor levels int fchmatch(SEXP x, SEXP val, int nomatch) { const SEXP *px = STRING_PTR(x), v = asChar(val); for(int i = 0, l = length(x); i != l; ++i) if(px[i] == v) return i + 1; return nomatch; } SEXP whichv(SEXP x, SEXP val, SEXP Rinvert) { int j = 0, n = length(x), invert = asLogical(Rinvert); if(length(val) != 1) error("value needs to be length 1"); int *buf = (int *) R_alloc(n, sizeof(int)); SEXP ans; #define WHICHVLOOP \ if(invert) { \ for(int i = 0; i != n; ++i) if(px[i] != v) buf[j++] = i+1; \ } else { \ for(int i = 0; i != n; ++i) if(px[i] == v) buf[j++] = i+1; \ } switch(TYPEOF(x)) { case INTSXP: case LGLSXP: { const int *px = INTEGER(x); int v; if(TYPEOF(val) == STRSXP) { if(!isFactor(x)) error("Type mismatch: if value is character, x must be character or factor."); v = fchmatch(getAttrib(x, R_LevelsSymbol), val, 0); } else v = asInteger(val); WHICHVLOOP break; } case REALSXP: { const double *px = REAL(x); const double v = asReal(val); if(ISNAN(v)) { if(invert) { for(int i = 0; i != n; ++i) if(NISNAN(px[i])) buf[j++] = i+1; } else { for(int i = 0; i != n; ++i) if(ISNAN(px[i])) buf[j++] = i+1; } } else { WHICHVLOOP } break; } case STRSXP: { const SEXP *px = STRING_PTR(x); const SEXP v = asChar(val); WHICHVLOOP break; } case RAWSXP : { const Rbyte *px = RAW(x); const Rbyte v = RAW(val)[0]; WHICHVLOOP break; } default: error("Unsupported type '%s' passed to whichv()", type2char(TYPEOF(x))); } PROTECT(ans = allocVector(INTSXP, j)); if(j) memcpy(INTEGER(ans), buf, sizeof(int) * j); UNPROTECT(1); return(ans); } SEXP anyallv(SEXP x, SEXP val, SEXP Rall) { int n = length(x), all = asLogical(Rall); if(length(val) != 1) error("value needs to be length 1"); #define ALLANYVLOOP \ if(all) { \ for(int i = 0; i != n; ++i) if(px[i] != v) return ScalarLogical(0); \ return ScalarLogical(1); \ } else { \ for(int i = 0; i != n; ++i) if(px[i] == v) return ScalarLogical(1); \ return ScalarLogical(0); \ } switch(TYPEOF(x)) { case INTSXP: case LGLSXP: { const int *px = INTEGER(x); int v; if(TYPEOF(val) == STRSXP) { if(!isFactor(x)) error("Type mismatch: if value is character, x must be character or factor."); v = fchmatch(getAttrib(x, R_LevelsSymbol), val, 0); } else v = asInteger(val); ALLANYVLOOP break; } case REALSXP: { const double *px = REAL(x); const double v = asReal(val); if(ISNAN(v)) error("please use allNA()"); ALLANYVLOOP break; } case STRSXP: { const SEXP *px = STRING_PTR(x); const SEXP v = asChar(val); ALLANYVLOOP break; } case RAWSXP : { const Rbyte *px = RAW(x); const Rbyte v = RAW(val)[0]; ALLANYVLOOP break; } default: error("Unsupported type '%s' passed to allv() / anyv()", type2char(TYPEOF(x))); } return(R_NilValue); } SEXP setcopyv(SEXP x, SEXP val, SEXP rep, SEXP Rinvert, SEXP Rset, SEXP Rind1) { int n = length(x), lv = length(val), lr = length(rep), ind1 = asLogical(Rind1), invert = asLogical(Rinvert), set = asLogical(Rset); if(lv > 1 || ind1) { if(lr != n) error("If length(v) > 1, length(r) must match length(x). Note that x[v] <- r is efficient in base R, only x[v] <- r[v] is optimized here."); if(TYPEOF(val) == LGLSXP) { if(lv != n) error("If v is a logical vector, length(v) needs to be equal to length(x). Note that x[v] <- r is efficient in base R, only x[v] <- r[v] is optimized here."); } else if(TYPEOF(val) == INTSXP) { if(invert) error("invert = TRUE is only possible if v is a logical vector"); } else error("If length(v) > 1, v must be an integer or logical vector used to subset both x and r"); } else if(lr != 1 && lr != n) error("If length(v) == 1, length(r) must be 1 or length(x)"); SEXP ans = R_NilValue; if(set == 0) PROTECT(ans = duplicate(x)); // Fastest?? // copies attributes ?? -> Yes #define setcopyvLOOP(e) \ if(invert) { \ for(int i = 0; i != n; ++i) if(px[i] != v) px[i] = e; \ } else { \ for(int i = 0; i != n; ++i) if(px[i] == v) px[i] = e; \ } #define setcopyvLOOPLVEC \ if(tv == INTSXP) { \ for(int i = 0; i != lv; ++i) px[pv[i]-1] = pr[pv[i]-1]; \ } else if(invert == 0) { \ for(int i = 0; i != n; ++i) if(pv[i]) px[i] = pr[i]; \ } else { \ for(int i = 0; i != n; ++i) if(pv[i] == 0) px[i] = pr[i]; \ } switch(TYPEOF(x)) { case INTSXP: case LGLSXP: { int *px = set ? INTEGER(x) : INTEGER(ans); if(lv == 1 && ind1 == 0) { int v; if(TYPEOF(val) == STRSXP) { if(!isFactor(x)) error("Type mismatch: if value is character, x must be character or factor."); v = fchmatch(getAttrib(x, R_LevelsSymbol), val, 0); } else v = asInteger(val); if(lr == 1) { const int r = asInteger(rep); setcopyvLOOP(r) } else { const int *pr = INTEGER(rep); setcopyvLOOP(pr[i]) } } else { const int tv = TYPEOF(val), *pv = tv == INTSXP ? INTEGER(val) : LOGICAL(val), *pr = INTEGER(rep); setcopyvLOOPLVEC } break; } case REALSXP: { double *px = set ? REAL(x) : REAL(ans); if(lv == 1 && ind1 == 0) { const double v = asReal(val); if(lr == 1) { const double r = asReal(rep); if(ISNAN(v)) { if(invert) { for(int i = 0; i != n; ++i) if(NISNAN(px[i])) px[i] = r; } else { for(int i = 0; i != n; ++i) if(ISNAN(px[i])) px[i] = r; } } else { setcopyvLOOP(r) } } else { const double *pr = REAL(rep); if(ISNAN(v)) { if(invert) { for(int i = 0; i != n; ++i) if(NISNAN(px[i])) px[i] = pr[i]; } else { for(int i = 0; i != n; ++i) if(ISNAN(px[i])) px[i] = pr[i]; } } else { setcopyvLOOP(pr[i]) } } } else { const int tv = TYPEOF(val), *pv = tv == INTSXP ? INTEGER(val) : LOGICAL(val); const double *pr = REAL(rep); setcopyvLOOPLVEC } break; } case STRSXP: { SEXP *px = set ? STRING_PTR(x) : STRING_PTR(ans); if(lv == 1 && ind1 == 0) { const SEXP v = asChar(val); if(lr == 1) { const SEXP r = asChar(rep); setcopyvLOOP(r) } else { const SEXP *pr = STRING_PTR(rep); setcopyvLOOP(pr[i]) } } else { const int tv = TYPEOF(val), *pv = tv == INTSXP ? INTEGER(val) : LOGICAL(val); const SEXP *pr = STRING_PTR(rep); setcopyvLOOPLVEC } break; } case VECSXP: { SEXP *px = set ? SEXPPTR(x) : SEXPPTR(ans); if(lv == 1 && ind1 == 0) error("Cannot compare lists to a value"); const int tv = TYPEOF(val), *pv = tv == INTSXP ? INTEGER(val) : LOGICAL(val); const SEXP *pr = SEXPPTR(rep); setcopyvLOOPLVEC break; } // case RAWSXP: // { // Rbyte *px = set ? RAW(x) : RAW(ans); // const Rbyte v = RAW(val)[0], r = RAW(rep)[0]; // setcopyvLOOP // break; // } default: error("Unsupported type '%s' passed to setv() / copyv()", type2char(TYPEOF(x))); } if(set == 0) { UNPROTECT(1); return(ans); } return(x); } SEXP setop_core(SEXP x, SEXP val, SEXP op, SEXP roww) { int n = length(x), nv = length(val), o = asInteger(op), tx = TYPEOF(x); #define OPSWITCH(e) \ switch(o) { \ case 1: for(int i = 0; i != n; ++i) px[i] += e; \ break; \ case 2: for(int i = 0; i != n; ++i) px[i] -= e; \ break; \ case 3: for(int i = 0; i != n; ++i) px[i] *= e; \ break; \ case 4: for(int i = 0; i != n; ++i) px[i] /= e; \ break; \ default: error("unsupported operation"); \ } if(nv == 1 || nv == n) { switch(tx) { case INTSXP: case LGLSXP: { int *px = INTEGER(x); if(nv == 1) { const int v = asInteger(val); OPSWITCH(v) } else { if(TYPEOF(val) == REALSXP) { // warning("adding real values to an integer: will truncate decimals"); const double *v = REAL(val); OPSWITCH(v[i]) } else { const int *v = INTEGER(val); OPSWITCH(v[i]) } } break; } case REALSXP: { double *px = REAL(x); if(nv == 1) { const double v = asReal(val); OPSWITCH(v) } else { if(TYPEOF(val) == REALSXP) { const double *v = REAL(val); OPSWITCH(v[i]) } else { const int *v = INTEGER(val); OPSWITCH(v[i]) } } break; } default: error("Unsupported type '%s'", type2char(tx)); } } else { if(!isMatrix(x)) error("unequal argument lengths"); int nr = nrows(x), nc = n / nr, rwl = asLogical(roww); if((rwl == 0 && nr != nv) || (rwl && nc != nv)) error("length of vector must match matrix rows/columns or the size of the matrix itself"); #define OPSWITCHMAT(e) \ switch(o) { \ case 1: for(int j = 0, cj; j != nc; ++j) { \ cj = j * nr; \ for(int i = 0; i != nr; ++i) px[cj + i] += e; \ } \ break; \ case 2: for(int j = 0, cj; j != nc; ++j) { \ cj = j * nr; \ for(int i = 0; i != nr; ++i) px[cj + i] -= e; \ } \ break; \ case 3: for(int j = 0, cj; j != nc; ++j) { \ cj = j * nr; \ for(int i = 0; i != nr; ++i) px[cj + i] *= e; \ } \ break; \ case 4: for(int j = 0, cj; j != nc; ++j) { \ cj = j * nr; \ for(int i = 0; i != nr; ++i) px[cj + i] /= e; \ } \ break; \ default: error("unsupported operation"); \ } switch(tx) { case INTSXP: case LGLSXP: { int *px = INTEGER(x); if(TYPEOF(val) == REALSXP) { // warning("adding real values to an integer: will truncate decimals"); const double *v = REAL(val); if(rwl) { OPSWITCHMAT(v[j]) } else { OPSWITCHMAT(v[i]) } } else { const int *v = INTEGER(val); if(rwl) { OPSWITCHMAT(v[j]) } else { OPSWITCHMAT(v[i]) } } break; } case REALSXP: { double *px = REAL(x); if(TYPEOF(val) == REALSXP) { const double *v = REAL(val); if(rwl) { OPSWITCHMAT(v[j]) } else { OPSWITCHMAT(v[i]) } } else { const int *v = INTEGER(val); if(rwl) { OPSWITCHMAT(v[j]) } else { OPSWITCHMAT(v[i]) } } break; } default: error("Unsupported type '%s'", type2char(tx)); } } return(x); } SEXP setop(SEXP x, SEXP val, SEXP op, SEXP roww) { // IF x is a list, call function repeatedly.. if(TYPEOF(x) == VECSXP) { SEXP *px = SEXPPTR(x); int lx = length(x); if(TYPEOF(val) == VECSXP) { // val is list: must match length(x) SEXP *pv = SEXPPTR(val); if(lx != length(val)) error("length(X) must match length(V)"); for(int i = 0; i != lx; ++i) setop_core(px[i], pv[i], op, roww); } else if (length(val) == 1 || asLogical(roww) == 0) { // val is a scalar or vector but rowwise = FALSE for(int i = 0; i != lx; ++i) setop_core(px[i], val, op, roww); } else { // val is a numeric or logical vector to be applied rowwise if(lx != length(val)) error("length(X) must match length(V)"); switch(TYPEOF(val)) { case REALSXP: { double *pv = REAL(val); for(int i = 0; i != lx; ++i) setop_core(px[i], ScalarReal(pv[i]), op, roww); break; } case INTSXP: case LGLSXP: { int *pv = INTEGER(val); for(int i = 0; i != lx; ++i) setop_core(px[i], ScalarInteger(pv[i]), op, roww); break; } default: error("Unsupported type '%s'", type2char(TYPEOF(val))); } } return x; } return setop_core(x, val, op, roww); } SEXP vtypes(SEXP x, SEXP isnum) { int tx = TYPEOF(x); if(tx != VECSXP) return ScalarInteger(tx); int n = length(x); SEXP ans = PROTECT(allocVector(INTSXP, n)); int *pans = INTEGER(ans); switch(asInteger(isnum)) { case 0: for(int i = 0; i != n; ++i) pans[i] = TYPEOF(VECTOR_ELT(x, i)) + 1; break; case 1: // Numeric variables: do_is with op = 100: https://github.com/wch/r-source/blob/2b0818a47199a0b64b6aa9b9f0e53a1e886e8e95/src/main/coerce.c for(int i = 0; i != n; ++i) { SEXP ci = VECTOR_ELT(x, i); int tci = TYPEOF(ci); pans[i] = (tci == INTSXP || tci == REALSXP) && OBJECT(ci) == 0; } SET_TYPEOF(ans, LGLSXP); break; case 2: for(int i = 0; i != n; ++i) pans[i] = (int)isFactor(VECTOR_ELT(x, i)); SET_TYPEOF(ans, LGLSXP); break; default: error("Unsupported vtypes option"); } UNPROTECT(1); return ans; } SEXP vlengths(SEXP x, SEXP usenam) { if(TYPEOF(x) != VECSXP) return ScalarInteger(length(x)); int n = length(x); SEXP ans = PROTECT(allocVector(INTSXP, n)); int *pans = INTEGER(ans); if(ALTREP(x)) { for(int i = 0; i != n; ++i) pans[i] = length(VECTOR_ELT(x, i)); } else { SEXP *px = SEXPPTR(x); for(int i = 0; i != n; ++i) pans[i] = length(px[i]); } if(asLogical(usenam)) { SEXP nam = getAttrib(x, R_NamesSymbol); if(TYPEOF(nam) != NILSXP) namesgets(ans, nam); } UNPROTECT(1); return ans; } // SEXP CasChar(SEXP x) { // return coerceVector(x, STRSXP); // } /* Inspired by: * do_list2env : .Internal(list2env(x, envir)) */ SEXP multiassign(SEXP lhs, SEXP rhs, SEXP envir) { if(TYPEOF(lhs) != STRSXP) error("lhs needs to be character"); int n = length(lhs); if(n == 1) { // lazy_duplicate appears not necessary (copy-on modify is automatically implemented, and <- also does not use it). defineVar(installChar(STRING_ELT(lhs, 0)), rhs, envir); return R_NilValue; } if(length(rhs) != n) error("length(lhs) must be equal to length(rhs)"); SEXP *plhs = STRING_PTR(lhs); switch(TYPEOF(rhs)) { // installTrChar translates to native encoding, installChar does the same now, but also is available on older systems. case REALSXP: { double *prhs = REAL(rhs); for(int i = 0; i < n; ++i) defineVar(installChar(plhs[i]), ScalarReal(prhs[i]), envir); break; } case INTSXP: { int *prhs = INTEGER(rhs); for(int i = 0; i < n; ++i) defineVar(installChar(plhs[i]), ScalarInteger(prhs[i]), envir); break; } case STRSXP: { SEXP *prhs = STRING_PTR(rhs); for(int i = 0; i < n; ++i) defineVar(installChar(plhs[i]), ScalarString(prhs[i]), envir); break; } case LGLSXP: { int *prhs = LOGICAL(rhs); for(int i = 0; i < n; ++i) defineVar(installChar(plhs[i]), ScalarLogical(prhs[i]), envir); break; } case VECSXP: { // lazy_duplicate appears not necessary (copy-on modify is automatically implemented, and <- also does not use it). for(int i = 0; i < n; ++i) defineVar(installChar(plhs[i]), VECTOR_ELT(rhs, i), envir); break; } default: { SEXP rhsl = PROTECT(coerceVector(rhs, VECSXP)); for(int i = 0; i < n; ++i) defineVar(installChar(plhs[i]), VECTOR_ELT(rhsl, i), envir); UNPROTECT(1); } } return R_NilValue; } SEXP vlabels(SEXP x, SEXP attrn, SEXP usenam) { if(!isString(attrn)) error("'attrn' must be of mode character"); if(length(attrn) != 1) error("exactly one attribute 'attrn' must be given"); SEXP sym_attrn = PROTECT(installChar(STRING_ELT(attrn, 0))); int l = length(x); if(TYPEOF(x) != VECSXP) { SEXP labx = getAttrib(x, sym_attrn); UNPROTECT(1); if(labx == R_NilValue) return ScalarString(NA_STRING); return labx; } SEXP res = PROTECT(allocVector(STRSXP, l)); SEXP *pres = STRING_PTR(res), *px = SEXPPTR(x); for(int i = 0; i < l; ++i) { SEXP labxi = getAttrib(px[i], sym_attrn); pres[i] = labxi == R_NilValue ? NA_STRING : STRING_ELT(labxi, 0); } if(asLogical(usenam)) { SEXP nam = getAttrib(x, R_NamesSymbol); if(TYPEOF(nam) != NILSXP) namesgets(res, nam); } UNPROTECT(2); return res; } // Note: ind can be NULL... SEXP setvlabels(SEXP x, SEXP attrn, SEXP value, SEXP ind) { // , SEXP sc if(!isString(attrn)) error("'attrn' must be of mode character"); if(length(attrn) != 1) error("exactly one attribute 'attrn' must be given"); if(TYPEOF(x) != VECSXP) error("X must be a list"); int nprotect = 1, l = length(x), tv = TYPEOF(value); // , scl = asLogical(sc); SEXP *px = SEXPPTR(x); // , xsc; // if(scl) { // Create shallow copy // if(INHERITS(x, char_datatable)) { // xsc = PROTECT(Calloccol(x)); // } else { // xsc = PROTECT(shallow_duplicate(x)); // } // ++nprotect; // px = SEXPPTR(xsc); // } SEXP *pv = px; if(tv != NILSXP) { if(tv == VECSXP || tv == STRSXP) { pv = SEXPPTR(value); } else { SEXP vl = PROTECT(coerceVector(value, VECSXP)); pv = SEXPPTR(vl); ++nprotect; } } SEXP sym_attrn = PROTECT(installChar(STRING_ELT(attrn, 0))); if(length(ind) == 0) { if(tv != NILSXP && l != length(value)) error("length(x) must match length(value)"); if(tv == NILSXP) { for(int i = 0; i < l; ++i) setAttrib(px[i], sym_attrn, R_NilValue); } else if(tv == STRSXP) { for(int i = 0; i < l; ++i) setAttrib(px[i], sym_attrn, ScalarString(pv[i])); } else { for(int i = 0; i < l; ++i) setAttrib(px[i], sym_attrn, pv[i]); } } else { if(TYPEOF(ind) != INTSXP) error("vlabels<-: ind must be of type integer"); int li = length(ind), *pind = INTEGER(ind), ii; if(tv != NILSXP && li != length(value)) error("length(ind) must match length(value)"); if(li == 0 || li > l) error("vlabels<-: length(ind) must be > 0 and <= length(x)"); if(tv == NILSXP) { for(int i = 0; i < li; ++i) { ii = pind[i]-1; if(ii < 0 || ii >= l) error("vlabels<-: ind must be between 1 and length(x)"); setAttrib(px[ii], sym_attrn, R_NilValue); } } else if(tv == STRSXP) { for(int i = 0; i < li; ++i) { ii = pind[i]-1; if(ii < 0 || ii >= l) error("vlabels<-: ind must be between 1 and length(x)"); setAttrib(px[ii], sym_attrn, ScalarString(pv[i])); } } else { for(int i = 0; i < li; ++i) { ii = pind[i]-1; if(ii < 0 || ii >= l) error("vlabels<-: ind must be between 1 and length(x)"); setAttrib(px[ii], sym_attrn, pv[i]); } } } UNPROTECT(nprotect); // return scl ? xsc : x; return x; } SEXP setnames(SEXP x, SEXP nam) { setAttrib(x, R_NamesSymbol, nam); return x; } collapse/src/fvar_fsd.cpp0000644000176200001440000017335214174223734015163 0ustar liggesusers#include using namespace Rcpp; // Note: More comments are in fvar.cpp (C++ folder, not on Github) // [[Rcpp::export]] NumericVector fvarsdCpp(const NumericVector& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, bool stable_algo = true, bool sd = true) { int l = x.size(); if(l < 2) return Rf_ScalarReal(NA_REAL); // Prevents seqfault for numeric(0) #101 if(stable_algo) { // WELFORDS ONLINE METHOD --------------------------------------------------------- if(Rf_isNull(w)) { // No weights if(ng == 0) { if(narm) { int j = l-1; // double n = 0; // long double mean = 0, d1 = 0, M2 = 0; // LD really necessary ? what about speed ? double n = 0, mean = 0, d1 = 0, M2 = 0; while(std::isnan(x[j]) && j!=0) --j; if(j != 0) { for(int i = j+1; i--; ) { if(std::isnan(x[i])) continue; d1 = x[i]-mean; mean += d1 * (1 / ++n); M2 += d1*(x[i]-mean); } M2 = M2/(n-1); if(sd) M2 = sqrt(M2); if(std::isnan(M2)) M2 = NA_REAL; return Rf_ScalarReal(M2); // (double)M2 } else return Rf_ScalarReal(NA_REAL); } else { // double n = 0; // long double mean = 0, d1 = 0, M2 = 0; double n = 0, mean = 0, d1 = 0, M2 = 0; for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { return Rf_ScalarReal(NA_REAL); } else { d1 = x[i]-mean; mean += d1*(1 / ++n); M2 += d1*(x[i]-mean); } } M2 = M2/(l-1); if(sd) M2 = sqrt(M2); if(std::isnan(M2)) M2 = NA_REAL; return Rf_ScalarReal(M2); // (double)M2 } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); // long double d1 = 0; double d1 = 0; if(narm) { NumericVector M2(ng, NA_REAL), mean(ng), n(ng, 1.0); // NumericVector mean = no_init_vector(ng); // better for valgrind for(int i = l; i--; ) { if(std::isnan(x[i])) continue; if(std::isnan(M2[g[i]-1])) { mean[g[i]-1] = x[i]; M2[g[i]-1] = 0; } else { d1 = x[i]-mean[g[i]-1]; mean[g[i]-1] += d1 * (1 / ++n[g[i]-1]); M2[g[i]-1] += d1*(x[i]-mean[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2[i])) M2[i] = NA_REAL; else { M2[i] = sqrt(M2[i]/(n[i]-1)); if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2[i])) M2[i] = NA_REAL; else { M2[i] /= n[i]-1; if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, M2); return M2; } else { NumericVector M2(ng), mean(ng), n(ng); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(M2[g[i]-1])) continue; if(std::isnan(x[i])) { M2[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { d1 = x[i]-mean[g[i]-1]; mean[g[i]-1] += d1 * (1 / ++n[g[i]-1]); M2[g[i]-1] += d1*(x[i]-mean[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2[i])) M2[i] = NA_REAL; else { M2[i] = sqrt(M2[i]/(n[i]-1)); if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2[i])) M2[i] = NA_REAL; else { M2[i] /= n[i]-1; if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, M2); return M2; } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); if(ng == 0) { if(narm) { int j = l-1; // long double sumw = 0, mean = 0, M2 = 0, d1 = 0; double sumw = 0, mean = 0, M2 = 0, d1 = 0; while((std::isnan(x[j]) || std::isnan(wg[j]) || wg[j] == 0) && j!=0) --j; if(j != 0) { for(int i = j+1; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; // additional check to skip 0 weights has practically zero cost.. sumw += wg[i]; d1 = x[i] - mean; mean += d1 * (wg[i] / sumw); M2 += wg[i] * d1 * (x[i] - mean); } M2 /= sumw-1; if(sd) M2 = sqrt(M2); if(std::isnan(M2)) M2 = NA_REAL; return Rf_ScalarReal(M2); // (double)M2 } else return Rf_ScalarReal(NA_REAL); } else { // long double sumw = 0, mean = 0, M2 = 0, d1 = 0; double sumw = 0, mean = 0, M2 = 0, d1 = 0; for(int i = 0; i != l; ++i) { if(std::isnan(x[i]) || std::isnan(wg[i])) { return NumericVector::create(NA_REAL); } else { if(wg[i] == 0) continue; // This is necessary to prevent 0 starting weights which will render the mean infinite. Has little computational cost. sumw += wg[i]; d1 = x[i] - mean; mean += d1 * (wg[i] / sumw); M2 += wg[i] * d1 * (x[i] - mean); } } M2 /= sumw-1; if(sd) M2 = sqrt(M2); if(std::isnan(M2)) M2 = NA_REAL; return Rf_ScalarReal(M2); // (double)M2 } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); // long double d1 = 0; double d1 = 0; if(narm) { NumericVector M2(ng, NA_REAL), sumw(ng), mean(ng); // better for valgrind // NumericVector sumw = no_init_vector(ng), mean = no_init_vector(ng); for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; if(std::isnan(M2[g[i]-1])) { sumw[g[i]-1] = wg[i]; mean[g[i]-1] = x[i]; M2[g[i]-1] = 0; } else { sumw[g[i]-1] += wg[i]; d1 = x[i] - mean[g[i]-1]; mean[g[i]-1] += d1 * (wg[i] / sumw[g[i]-1]); M2[g[i]-1] += wg[i] * d1 * (x[i] - mean[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2[i])) M2[i] = NA_REAL; else { M2[i] = sqrt(M2[i]/(sumw[i]-1)); if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2[i])) M2[i] = NA_REAL; else { M2[i] /= sumw[i]-1; if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, M2); return M2; } else { NumericVector M2(ng), sumw(ng), mean(ng); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(M2[g[i]-1])) continue; if(std::isnan(x[i]) || std::isnan(wg[i])) { M2[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { if(wg[i] == 0) continue; // This is necessary to prevent 0 starting weights which will render the mean infinite. Has little computational cost. sumw[g[i]-1] += wg[i]; d1 = x[i] - mean[g[i]-1]; mean[g[i]-1] += d1 * (wg[i] / sumw[g[i]-1]); M2[g[i]-1] += wg[i] * d1 * (x[i] - mean[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2[i])) M2[i] = NA_REAL; else { M2[i] = sqrt(M2[i]/(sumw[i]-1)); if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2[i])) M2[i] = NA_REAL; else { M2[i] /= sumw[i]-1; if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, M2); return M2; } } } } else { // ONE-PASS METHOD --------------------------------------------------------- if(Rf_isNull(w)) { // No weights if(ng == 0) { if(narm) { int j = l-1, n = 1; long double sum = x[j], sq_sum; while(std::isnan(sum) && j!=0) sum = x[--j]; sq_sum = sum*sum; if(j != 0) { for(int i = j; i--; ) { if(std::isnan(x[i])) continue; sum += x[i]; sq_sum += pow(x[i],2); ++n; } sq_sum = (sq_sum - pow(sum/n,2)*n)/(n-1); if(sd) sq_sum = sqrt(sq_sum); if(std::isnan(sq_sum)) sq_sum = NA_REAL; return Rf_ScalarReal((double)sq_sum); } else return Rf_ScalarReal(NA_REAL); } else { long double sum = 0, sq_sum = 0; for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { return Rf_ScalarReal(x[i]); } else { sum += x[i]; sq_sum += pow(x[i],2); } } sq_sum = (sq_sum - pow(sum/l,2)*l)/(l-1); if(sd) sq_sum = sqrt(sq_sum); if(std::isnan(sq_sum)) sq_sum = NA_REAL; return Rf_ScalarReal((double)sq_sum); } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericVector sq_sum(ng, NA_REAL), sum(ng); // better for valgrind // NumericVector sum = no_init_vector(ng); IntegerVector n(ng, 1); for(int i = l; i--; ) { if(std::isnan(x[i])) continue; if(std::isnan(sq_sum[g[i]-1])) { sum[g[i]-1] = x[i]; sq_sum[g[i]-1] = pow(x[i],2); } else { sum[g[i]-1] += x[i]; sq_sum[g[i]-1] += pow(x[i],2); ++n[g[i]-1]; } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sum[i])) continue; sq_sum[i] = sqrt((sq_sum[i] - pow(sum[i]/n[i],2)*n[i])/(n[i]-1)); if(std::isnan(sq_sum[i])) sq_sum[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sum[i])) continue; sq_sum[i] = (sq_sum[i] - pow(sum[i]/n[i],2)*n[i])/(n[i]-1); if(std::isnan(sq_sum[i])) sq_sum[i] = NA_REAL; } } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sq_sum); return sq_sum; } else { NumericVector sq_sum(ng), sum(ng); // IntegerVector gsv = no_init_vector(ng); // no problem but this is better for valgrind IntegerVector gsv = (Rf_isNull(gs)) ? IntegerVector(ng) : as(gs); int ngs = 0; if(Rf_isNull(gs)) { // gsv = IntegerVector(ng); // std::fill(gsv.begin(), gsv.end(), 0); for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { if(std::isnan(sq_sum[g[i]-1])) continue; sq_sum[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { sum[g[i]-1] += x[i]; sq_sum[g[i]-1] += pow(x[i],2); ++gsv[g[i]-1]; } } } else { // gsv = gs; if(gsv.size() != ng) stop("Vector of group-sizes must match number of groups"); for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { if(std::isnan(sq_sum[g[i]-1])) continue; sq_sum[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { sum[g[i]-1] += x[i]; sq_sum[g[i]-1] += pow(x[i],2); } } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sum[i])) continue; sq_sum[i] = sqrt((sq_sum[i] - pow(sum[i]/gsv[i],2)*gsv[i])/(gsv[i]-1)); if(std::isnan(sq_sum[i])) sq_sum[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sum[i])) continue; sq_sum[i] = (sq_sum[i] - pow(sum[i]/gsv[i],2)*gsv[i])/(gsv[i]-1); if(std::isnan(sq_sum[i])) sq_sum[i] = NA_REAL; } } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sq_sum); return sq_sum; } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); if(ng == 0) { if(narm) { int j = l-1; while((std::isnan(x[j]) || std::isnan(wg[j])) && j!=0) --j; long double sumw = wg[j], sum = x[j]*sumw, sq_sum = sum*x[j]; if(j != 0) { for(int i = j; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i])) continue; sum += x[i]*wg[i]; sumw += wg[i]; sq_sum += pow(x[i],2)*wg[i]; } sq_sum = (sq_sum - pow(sum/sumw,2)*sumw)/(sumw-1); if(sd) sq_sum = sqrt(sq_sum); if(std::isnan(sq_sum)) sq_sum = NA_REAL; return Rf_ScalarReal((double)sq_sum); } else return Rf_ScalarReal(NA_REAL); } else { long double sum = 0, sumw = 0, sq_sum = 0; for(int i = 0; i != l; ++i) { if(std::isnan(x[i]) || std::isnan(wg[i])) { return Rf_ScalarReal(NA_REAL); } else { sum += x[i]*wg[i]; sumw += wg[i]; sq_sum += pow(x[i],2)*wg[i]; } } sq_sum = (sq_sum - pow(sum/sumw,2)*sumw)/(sumw-1); if(sd) sq_sum = sqrt(sq_sum); if(std::isnan(sq_sum)) sq_sum = NA_REAL; return Rf_ScalarReal((double)sq_sum); } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericVector sq_sum(ng, NA_REAL), sumw(ng), sum(ng); // better for valgrind // NumericVector sumw = no_init_vector(ng), sum = no_init_vector(ng); for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i])) continue; if(std::isnan(sq_sum[g[i]-1])) { sum[g[i]-1] = x[i]*wg[i]; sumw[g[i]-1] = wg[i]; sq_sum[g[i]-1] = pow(x[i],2)*wg[i]; } else { sum[g[i]-1] += x[i]*wg[i]; sumw[g[i]-1] += wg[i]; sq_sum[g[i]-1] += pow(x[i],2)*wg[i]; } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sum[i])) continue; sq_sum[i] = sqrt((sq_sum[i] - pow(sum[i]/sumw[i],2)*sumw[i])/(sumw[i]-1)); if(std::isnan(sq_sum[i])) sq_sum[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sum[i])) continue; sq_sum[i] = (sq_sum[i] - pow(sum[i]/sumw[i],2)*sumw[i])/(sumw[i]-1); if(std::isnan(sq_sum[i])) sq_sum[i] = NA_REAL; } } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sq_sum); return sq_sum; } else { NumericVector sq_sum(ng), sumw(ng), sum(ng); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(sq_sum[g[i]-1])) continue; if(std::isnan(x[i]) || std::isnan(wg[i])) { sq_sum[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { sum[g[i]-1] += x[i]*wg[i]; sumw[g[i]-1] += wg[i]; sq_sum[g[i]-1] += pow(x[i],2)*wg[i]; } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sum[i])) continue; sq_sum[i] = sqrt((sq_sum[i] - pow(sum[i]/sumw[i],2)*sumw[i])/(sumw[i]-1)); if(std::isnan(sq_sum[i])) sq_sum[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sum[i])) continue; sq_sum[i] = (sq_sum[i] - pow(sum[i]/sumw[i],2)*sumw[i])/(sumw[i]-1); if(std::isnan(sq_sum[i])) sq_sum[i] = NA_REAL; } } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sq_sum); return sq_sum; } } } } } // [[Rcpp::export]] SEXP fvarsdmCpp(const NumericMatrix& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, bool stable_algo = true, bool sd = true, bool drop = true) { int l = x.nrow(), col = x.ncol(); if(stable_algo) { // WELFORDS ONLINE METHOD ------------------------------------- if(Rf_isNull(w)) { // No weights if(ng == 0) { NumericVector out = no_init_vector(col); if(narm) { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); int k = l-1; // double ni = 0; // long double meani = 0, d1i = 0, M2i = 0; double ni = 0, meani = 0, d1i = 0, M2i = 0; while(std::isnan(column[k]) && k!=0) --k; if(k != 0) { for(int i = k+1; i--; ) { if(std::isnan(column[i])) continue; d1i = column[i]-meani; meani += d1i * (1 / ++ni); M2i += d1i*(column[i]-meani); } M2i /= ni-1; if(sd) M2i = sqrt(M2i); if(std::isnan(M2i)) M2i = NA_REAL; out[j] = M2i; // (double)M2i; } else out[j] = NA_REAL; } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); // double ni = 0; // long double meani = 0, d1i = 0, M2i = 0; double ni = 0, meani = 0, d1i = 0, M2i = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { M2i = NA_REAL; break; } else { d1i = column[i]-meani; meani += d1i * (1 / ++ni); M2i += d1i*(column[i]-meani); } } M2i /= l-1; if(sd) M2i = sqrt(M2i); if(std::isnan(M2i)) M2i = NA_REAL; out[j] = M2i; // (double)M2i; } } if(drop) Rf_setAttrib(out, R_NamesSymbol, colnames(x)); else { Rf_dimgets(out, Dimension(1, col)); colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); } return out; } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericMatrix M2 = no_init_matrix(ng, col); std::fill(M2.begin(), M2.end(), NA_REAL); NumericVector meanj(ng), nj(ng); // better for valgrind for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column M2j = M2( _ , j); double d1j = 0; // , meanj[ng], nj[ng]; // NumericVector meanj = no_init_vector(ng), nj = no_init_vector(ng); // better for valgrind for(int i = l; i--; ) { if(std::isnan(column[i])) continue; if(std::isnan(M2j[g[i]-1])) { meanj[g[i]-1] = column[i]; M2j[g[i]-1] = 0; nj[g[i]-1] = 1; } else { d1j = column[i]-meanj[g[i]-1]; meanj[g[i]-1] += d1j * (1 / ++nj[g[i]-1]); M2j[g[i]-1] += d1j*(column[i]-meanj[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] = sqrt(M2j[i]/(nj[i]-1)); if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] /= nj[i]-1; if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } } colnames(M2) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, M2); return M2; } else { NumericMatrix M2(ng, col); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column M2j = M2( _ , j); std::vector meanj(ng), nj(ng); double d1j = 0; int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(M2j[g[i]-1])) continue; if(std::isnan(column[i])) { M2j[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { d1j = column[i]-meanj[g[i]-1]; meanj[g[i]-1] += d1j * (1 / ++nj[g[i]-1]); M2j[g[i]-1] += d1j*(column[i]-meanj[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] = sqrt(M2j[i]/(nj[i]-1)); if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] /= nj[i]-1; if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } } colnames(M2) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, M2); return M2; } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match nrow(X)"); if(ng == 0) { NumericVector out = no_init_vector(col); if(narm) { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); int k = l-1; // long double sumwi = 0, meani = 0, M2i = 0, d1i = 0; double sumwi = 0, meani = 0, M2i = 0, d1i = 0; while((std::isnan(column[k]) || std::isnan(wg[k]) || wg[k] == 0) && k!=0) --k; if(k != 0) { for(int i = k+1; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; sumwi += wg[i]; d1i = column[i] - meani; meani += d1i * (wg[i] / sumwi); M2i += wg[i] * d1i * (column[i] - meani); } M2i /= sumwi-1; if(sd) M2i = sqrt(M2i); if(std::isnan(M2i)) M2i = NA_REAL; out[j] = M2i; // (double)M2i; } else out[j] = NA_REAL; } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); // long double sumwi = 0, meani = 0, M2i = 0, d1i = 0; double sumwi = 0, meani = 0, M2i = 0, d1i = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { M2i = NA_REAL; break; } else { if(wg[i] == 0) continue; sumwi += wg[i]; d1i = column[i] - meani; meani += d1i * (wg[i] / sumwi); M2i += wg[i] * d1i * (column[i] - meani); } } M2i /= sumwi-1; if(sd) M2i = sqrt(M2i); if(std::isnan(M2i)) M2i = NA_REAL; out[j] = M2i; // (double)M2i; } } if(drop) Rf_setAttrib(out, R_NamesSymbol, colnames(x)); else { Rf_dimgets(out, Dimension(1, col)); colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); } return out; } else { // with groups and weights if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericMatrix M2 = no_init_matrix(ng, col); std::fill(M2.begin(), M2.end(), NA_REAL); NumericVector meanj(ng), sumwj(ng); // better for valgrind for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column M2j = M2( _ , j); double d1j = 0; // meanj[ng], sumwj[ng]; // NumericVector meanj = no_init_vector(ng), sumwj = no_init_vector(ng); better for valgrind for(int i = l; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; if(std::isnan(M2j[g[i]-1])) { sumwj[g[i]-1] = wg[i]; meanj[g[i]-1] = column[i]; M2j[g[i]-1] = 0; } else { sumwj[g[i]-1] += wg[i]; d1j = column[i] - meanj[g[i]-1]; meanj[g[i]-1] += d1j * (wg[i] / sumwj[g[i]-1]); M2j[g[i]-1] += wg[i] * d1j * (column[i] - meanj[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] = sqrt(M2j[i]/(sumwj[i]-1)); if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] /= sumwj[i]-1; if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } } colnames(M2) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, M2); return M2; } else { NumericMatrix M2(ng, col); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column M2j = M2( _ , j); std::vector meanj(ng), sumwj(ng); double d1j = 0; int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(M2j[g[i]-1])) continue; if(std::isnan(column[i]) || std::isnan(wg[i])) { M2j[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { if(wg[i] == 0) continue; sumwj[g[i]-1] += wg[i]; d1j = column[i] - meanj[g[i]-1]; meanj[g[i]-1] += d1j * (wg[i] / sumwj[g[i]-1]); M2j[g[i]-1] += wg[i] * d1j * (column[i] - meanj[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] = sqrt(M2j[i]/(sumwj[i]-1)); if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] /= sumwj[i]-1; if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } } colnames(M2) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, M2); return M2; } } } } else { // ONE-PASS METHOD ------------------------------------- if(Rf_isNull(w)) { // No weights if(ng == 0) { NumericVector out = no_init_vector(col); if(narm) { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); int k = l-1, nj = 1; long double sumj = column[k], sq_sumj = 0; while(std::isnan(sumj) && k!=0) sumj = column[--k]; sq_sumj = sumj*sumj; if(k != 0) { for(int i = k; i--; ) { if(std::isnan(column[i])) continue; sumj += column[i]; sq_sumj += pow(column[i],2); ++nj; } sq_sumj = (sq_sumj-pow(sumj/nj,2)*nj)/(nj-1); if(sd) sq_sumj = sqrt(sq_sumj); if(std::isnan(sq_sumj)) sq_sumj = NA_REAL; out[j] = (double)sq_sumj; } else out[j] = NA_REAL; } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); long double sumj = 0, sq_sumj = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { sq_sumj = NA_REAL; break; } else { sumj += column[i]; sq_sumj += pow(column[i],2); } } if(!std::isnan(sq_sumj)) { sq_sumj = (sq_sumj-pow(sumj/l,2)*l)/(l-1); if(sd) sq_sumj = sqrt(sq_sumj); if(std::isnan(sq_sumj)) sq_sumj = NA_REAL; } out[j] = (double)sq_sumj; } } if(drop) Rf_setAttrib(out, R_NamesSymbol, colnames(x)); else { Rf_dimgets(out, Dimension(1, col)); colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); } return out; } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericMatrix sq_sum = no_init_matrix(ng, col); std::fill(sq_sum.begin(), sq_sum.end(), NA_REAL); // better for valgrind (although no error) NumericVector sumj(ng); // = no_init_vector(ng); // double sumj[ng]; IntegerVector nj(ng); // = no_init_vector(ng); // int nj[ng]; for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column sq_sumj = sq_sum( _ , j); for(int i = l; i--; ) { if(std::isnan(column[i])) continue; if(std::isnan(sq_sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; sq_sumj[g[i]-1] = pow(column[i],2); nj[g[i]-1] = 1; } else { sumj[g[i]-1] += column[i]; sq_sumj[g[i]-1] += pow(column[i],2); ++nj[g[i]-1]; } } for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/nj[i],2)*nj[i])/(nj[i]-1); if(sd) sq_sumj[i] = sqrt(sq_sumj[i]); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } colnames(sq_sum) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sq_sum); return sq_sum; } else { NumericMatrix sq_sum(ng, col); if(Rf_isNull(gs)) { // int gsv[ng], memsize = sizeof(int)*ng; for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column sq_sumj = sq_sum( _ , j); std::vector gsv(ng); // memset(gsv, 0, memsize); std::vector sumj(ng); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { if(std::isnan(sq_sumj[g[i]-1])) continue; sq_sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } else { sumj[g[i]-1] += column[i]; sq_sumj[g[i]-1] += pow(column[i],2); ++gsv[g[i]-1]; } } for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/gsv[i],2)*gsv[i])/(gsv[i]-1); if(sd) sq_sumj[i] = sqrt(sq_sumj[i]); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } } else { IntegerVector gsv = gs; if(gsv.size() != ng) stop("Vector of group-sizes must match number of groups"); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column sq_sumj = sq_sum( _ , j); std::vector sumj(ng); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { if(std::isnan(sq_sumj[g[i]-1])) continue; sq_sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } else { sumj[g[i]-1] += column[i]; sq_sumj[g[i]-1] += pow(column[i],2); } } for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/gsv[i],2)*gsv[i])/(gsv[i]-1); if(sd) sq_sumj[i] = sqrt(sq_sumj[i]); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } } colnames(sq_sum) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sq_sum); return sq_sum; } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match nrow(X)"); if(ng == 0) { NumericVector out = no_init_vector(col); if(narm) { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); int k = l-1; while((std::isnan(column[k]) || std::isnan(wg[k])) && k!=0) --k; long double sumwj = wg[k], sumj = column[k]*sumwj, sq_sumj = column[k]*sumj; if(k != 0) { for(int i = k; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; sumj += column[i]*wg[i]; sumwj += wg[i]; sq_sumj += pow(column[i],2)*wg[i]; } sq_sumj = (sq_sumj - pow(sumj/sumwj,2)*sumwj)/(sumwj-1); if(sd) sq_sumj = sqrt(sq_sumj); if(std::isnan(sq_sumj)) sq_sumj = NA_REAL; out[j] = (double)sq_sumj; } else out[j] = NA_REAL; } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); long double sumj = 0, sumwj = 0, sq_sumj = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { sq_sumj = NA_REAL; break; } else { sumj += column[i]*wg[i]; sumwj += wg[i]; sq_sumj += pow(column[i],2)*wg[i]; } } if(!std::isnan(sq_sumj)) { sq_sumj = (sq_sumj - pow(sumj/sumwj,2)*sumwj)/(sumwj-1); if(sd) sq_sumj = sqrt(sq_sumj); if(std::isnan(sq_sumj)) sq_sumj = NA_REAL; } out[j] = (double)sq_sumj; } } if(drop) Rf_setAttrib(out, R_NamesSymbol, colnames(x)); else { Rf_dimgets(out, Dimension(1, col)); colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); } return out; } else { // with groups and weights if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericMatrix sq_sum = no_init_matrix(ng, col); std::fill(sq_sum.begin(), sq_sum.end(), NA_REAL); // better for valgrind (although no error) NumericVector sumj(ng), sumwj(ng); // double sumj[ng], sumwj[ng]; for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column sq_sumj = sq_sum( _ , j); // NumericVector sumj = no_init_vector(ng), sumwj = no_init_vector(ng); // double sumj[ng], sumwj[ng]; for(int i = l; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; if(std::isnan(sq_sumj[g[i]-1])) { sumj[g[i]-1] = column[i]*wg[i]; sumwj[g[i]-1] = wg[i]; sq_sumj[g[i]-1] = pow(column[i],2)*wg[i]; } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; sq_sumj[g[i]-1] += pow(column[i],2)*wg[i]; } } for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/sumwj[i],2)*sumwj[i])/(sumwj[i]-1); if(sd) sq_sumj[i] = sqrt(sq_sumj[i]); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } colnames(sq_sum) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sq_sum); return sq_sum; } else { NumericMatrix sq_sum(ng, col); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column sq_sumj = sq_sum( _ , j); std::vector sumj(ng), sumwj(ng); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(sq_sumj[g[i]-1])) continue; if(std::isnan(column[i]) || std::isnan(wg[i])) { sq_sumj[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; sq_sumj[g[i]-1] += pow(column[i],2)*wg[i]; } } for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/sumwj[i],2)*sumwj[i])/(sumwj[i]-1); if(sd) sq_sumj[i] = sqrt(sq_sumj[i]); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } colnames(sq_sum) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sq_sum); return sq_sum; } } } } } // [[Rcpp::export]] SEXP fvarsdlCpp(const List& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, bool stable_algo = true, bool sd = true, bool drop = true) { int l = x.size(); if(stable_algo) { // WELFORDS ONLINE METHOD ------------------------------------- if(Rf_isNull(w)) { // No weights if(ng == 0) { NumericVector out(l); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; int k = column.size()-1; // double ni = 0; // long double meani = 0, d1i = 0, M2i = 0; double ni = 0, meani = 0, d1i = 0, M2i = 0; while(std::isnan(column[k]) && k!=0) --k; if(k != 0) { for(int i = k+1; i--; ) { if(std::isnan(column[i])) continue; d1i = column[i]-meani; meani += d1i * (1 / ++ni); M2i += d1i*(column[i]-meani); } M2i /= ni-1; if(sd) M2i = sqrt(M2i); if(std::isnan(M2i)) M2i = NA_REAL; out[j] = M2i; // (double)M2i; } else out[j] = NA_REAL; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); // double ni = 0; // long double meani = 0, d1i = 0, M2i = 0; double ni = 0, meani = 0, d1i = 0, M2i = 0; for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) { M2i = NA_REAL; break; } else { d1i = column[i]-meani; meani += d1i * (1 / ++ni); M2i += d1i*(column[i]-meani); } } M2i /= row-1; if(sd) M2i = sqrt(M2i); if(std::isnan(M2i)) M2i = NA_REAL; out[j] = M2i; // (double)M2i; } } if(drop) { Rf_setAttrib(out, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return out; } else { List res(l); for(int j = l; j--; ) { res[j] = out[j]; SHALLOW_DUPLICATE_ATTRIB(res[j], x[j]); } SHALLOW_DUPLICATE_ATTRIB(res, x); Rf_setAttrib(res, R_RowNamesSymbol, Rf_ScalarInteger(1)); return res; } } else { // With groups List out(l); int gss = g.size(); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector M2j(ng, NA_REAL), nj(ng, 1.0), meanj(ng); // better for valgrind // = no_init_vector(ng); double d1j = 0; // meanj[ng] // std::vector nj(ng, 1.0); for(int i = gss; i--; ) { if(std::isnan(column[i])) continue; if(std::isnan(M2j[g[i]-1])) { meanj[g[i]-1] = column[i]; M2j[g[i]-1] = 0; } else { d1j = column[i]-meanj[g[i]-1]; meanj[g[i]-1] += d1j * (1 / ++nj[g[i]-1]); M2j[g[i]-1] += d1j*(column[i]-meanj[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] = sqrt(M2j[i]/(nj[i]-1)); if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] /= nj[i]-1; if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } SHALLOW_DUPLICATE_ATTRIB(M2j, column); out[j] = M2j; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector M2j(ng); std::vector meanj(ng), nj(ng); double d1j = 0; int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(M2j[g[i]-1])) continue; if(std::isnan(column[i])) { M2j[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { d1j = column[i]-meanj[g[i]-1]; meanj[g[i]-1] += d1j * (1 / ++nj[g[i]-1]); M2j[g[i]-1] += d1j*(column[i]-meanj[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] = sqrt(M2j[i]/(nj[i]-1)); if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] /= nj[i]-1; if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } SHALLOW_DUPLICATE_ATTRIB(M2j, column); out[j] = M2j; } } SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return out; } } else { // With weights NumericVector wg = w; int wgs = wg.size(); if(ng == 0) { NumericVector out(l); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(column.size() != wgs) stop("length(w) must match nrow(X)"); int k = wgs-1; // long double sumwi = 0, meani = 0, M2i = 0, d1i = 0; double sumwi = 0, meani = 0, M2i = 0, d1i = 0; while((std::isnan(column[k]) || std::isnan(wg[k]) || wg[k] == 0) && k!=0) --k; if(k != 0) { for(int i = k+1; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; sumwi += wg[i]; d1i = column[i] - meani; meani += d1i * (wg[i] / sumwi); M2i += wg[i] * d1i * (column[i] - meani); } M2i /= sumwi-1; if(sd) M2i = sqrt(M2i); if(std::isnan(M2i)) M2i = NA_REAL; out[j] = M2i; // (double)M2i; } else out[j] = NA_REAL; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; if(column.size() != wgs) stop("length(w) must match nrow(X)"); // long double sumwi = 0, meani = 0, M2i = 0, d1i = 0; double sumwi = 0, meani = 0, M2i = 0, d1i = 0; for(int i = 0; i != wgs; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { M2i = NA_REAL; break; } else { if(wg[i] == 0) continue; sumwi += wg[i]; d1i = column[i] - meani; meani += d1i * (wg[i] / sumwi); M2i += wg[i] * d1i * (column[i] - meani); } } M2i /= sumwi-1; if(sd) M2i = sqrt(M2i); if(std::isnan(M2i)) M2i = NA_REAL; out[j] = M2i; // (double)M2i; } } if(drop) { Rf_setAttrib(out, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return out; } else { List res(l); for(int j = l; j--; ) { res[j] = out[j]; SHALLOW_DUPLICATE_ATTRIB(res[j], x[j]); } SHALLOW_DUPLICATE_ATTRIB(res, x); Rf_setAttrib(res, R_RowNamesSymbol, Rf_ScalarInteger(1)); return res; } } else { List out(l); int gss = g.size(); if(wgs != gss) stop("length(w) must match length(g)"); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector M2j(ng, NA_REAL), meanj(ng), sumwj(ng); // better for valgrind //= no_init_vector(ng), sumwj = no_init_vector(ng); double d1j = 0; // , sumwj[ng], meanj[ng]; for(int i = gss; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; if(std::isnan(M2j[g[i]-1])) { sumwj[g[i]-1] = wg[i]; meanj[g[i]-1] = column[i]; M2j[g[i]-1] = 0; } else { sumwj[g[i]-1] += wg[i]; d1j = column[i] - meanj[g[i]-1]; meanj[g[i]-1] += d1j * (wg[i] / sumwj[g[i]-1]); M2j[g[i]-1] += wg[i] * d1j * (column[i] - meanj[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] = sqrt(M2j[i]/(sumwj[i]-1)); if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] /= sumwj[i]-1; if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } SHALLOW_DUPLICATE_ATTRIB(M2j, column); out[j] = M2j; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector M2j(ng); std::vector sumwj(ng), meanj(ng); double d1j = 0; int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(M2j[g[i]-1])) continue; if(std::isnan(column[i]) || std::isnan(wg[i])) { M2j[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { if(wg[i] == 0) continue; sumwj[g[i]-1] += wg[i]; d1j = column[i] - meanj[g[i]-1]; meanj[g[i]-1] += d1j * (wg[i] / sumwj[g[i]-1]); M2j[g[i]-1] += wg[i] * d1j * (column[i] - meanj[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] = sqrt(M2j[i]/(sumwj[i]-1)); if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] /= sumwj[i]-1; if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } SHALLOW_DUPLICATE_ATTRIB(M2j, column); out[j] = M2j; } } SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return out; } } } else { // ONE-PASS METHOD ------------------------------------- if(Rf_isNull(w)) { // No weights if(ng == 0) { NumericVector out(l); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; int k = column.size()-1, ni = 1; long double sumi = column[k], sq_sumi = 0; while(std::isnan(sumi) && k!=0) sumi = column[--k]; sq_sumi = sumi*sumi; if(k != 0) { for(int i = k; i--; ) { if(std::isnan(column[i])) continue; sumi += column[i]; sq_sumi += pow(column[i],2); ++ni; } sq_sumi = (sq_sumi-pow(sumi/ni,2)*ni)/(ni-1); if(sd) sq_sumi = sqrt(sq_sumi); if(std::isnan(sq_sumi)) sq_sumi = NA_REAL; out[j] = (double)sq_sumi; } else out[j] = NA_REAL; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; long double sumi = 0, sq_sumi = 0; int row = column.size(); for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) { sq_sumi = NA_REAL; break; } else { sumi += column[i]; sq_sumi += pow(column[i],2); } } if(!std::isnan(sq_sumi)) { sq_sumi = (sq_sumi - pow(sumi/row,2)*row)/(row-1); if(sd) sq_sumi = sqrt(sq_sumi); if(std::isnan(sq_sumi)) sq_sumi = NA_REAL; } out[j] = (double)sq_sumi; } } if(drop) { Rf_setAttrib(out, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return out; } else { List res(l); for(int j = l; j--; ) { res[j] = out[j]; SHALLOW_DUPLICATE_ATTRIB(res[j], x[j]); } SHALLOW_DUPLICATE_ATTRIB(res, x); Rf_setAttrib(res, R_RowNamesSymbol, Rf_ScalarInteger(1)); return res; } } else { // With groups List out(l); int gss = g.size(); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector sq_sumj(ng, NA_REAL), sumj(ng); // better for valgrind // = no_init_vector(ng); // double sumj[ng]; std::vector nj(ng, 1); for(int i = gss; i--; ) { if(std::isnan(column[i])) continue; if(std::isnan(sq_sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; sq_sumj[g[i]-1] = pow(column[i],2); } else { sumj[g[i]-1] += column[i]; sq_sumj[g[i]-1] += pow(column[i],2); ++nj[g[i]-1]; } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = sqrt((sq_sumj[i] - pow(sumj[i]/nj[i],2)*nj[i])/(nj[i]-1)); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/nj[i],2)*nj[i])/(nj[i]-1); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } SHALLOW_DUPLICATE_ATTRIB(sq_sumj, column); out[j] = sq_sumj; } } else { if(Rf_isNull(gs)) { // int gsv[ng], memsize = sizeof(int)*ng; for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector sq_sumj(ng), sumj(ng); std::vector gsv(ng); // memset(gsv, 0, memsize); int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(column[i])) { if(std::isnan(sq_sumj[g[i]-1])) continue; sq_sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } else { sumj[g[i]-1] += column[i]; sq_sumj[g[i]-1] += pow(column[i],2); ++gsv[g[i]-1]; } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = sqrt((sq_sumj[i] - pow(sumj[i]/gsv[i],2)*gsv[i])/(gsv[i]-1)); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/gsv[i],2)*gsv[i])/(gsv[i]-1); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } SHALLOW_DUPLICATE_ATTRIB(sq_sumj, column); out[j] = sq_sumj; } } else { IntegerVector gsv = gs; if(gsv.size() != ng) stop("Vector of group-sizes must match number of groups"); for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector sq_sumj(ng); std::vector sumj(ng); int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(column[i])) { if(std::isnan(sq_sumj[g[i]-1])) continue; sq_sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } else { sumj[g[i]-1] += column[i]; sq_sumj[g[i]-1] += pow(column[i],2); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = sqrt((sq_sumj[i] - pow(sumj[i]/gsv[i],2)*gsv[i])/(gsv[i]-1)); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/gsv[i],2)*gsv[i])/(gsv[i]-1); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } SHALLOW_DUPLICATE_ATTRIB(sq_sumj, column); out[j] = sq_sumj; } } } SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return out; } } else { // With weights NumericVector wg = w; int wgs = wg.size(); if(ng == 0) { NumericVector out(l); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(column.size() != wgs) stop("length(w) must match nrow(X)"); int k = wgs-1; while((std::isnan(column[k]) || std::isnan(wg[k])) && k!=0) --k; long double sumwi = wg[k], sumi = column[k]*sumwi, sq_sumi = column[k]*sumi; if(k != 0) { for(int i = k; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; sumi += column[i]*wg[i]; sumwi += wg[i]; sq_sumi += pow(column[i],2)*wg[i]; } sq_sumi = (sq_sumi - pow(sumi/sumwi,2)*sumwi)/(sumwi-1); if(sd) sq_sumi = sqrt(sq_sumi); if(std::isnan(sq_sumi)) sq_sumi = NA_REAL; out[j] = (double)sq_sumi; } else out[j] = NA_REAL; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; if(column.size() != wgs) stop("length(w) must match nrow(X)"); long double sumi = 0, sumwi = 0, sq_sumi = 0; for(int i = 0; i != wgs; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { sq_sumi = NA_REAL; break; } else { sumi += column[i]*wg[i]; sumwi += wg[i]; sq_sumi += pow(column[i],2)*wg[i]; } } if(!std::isnan(sq_sumi)) { sq_sumi = (sq_sumi - pow(sumi/sumwi,2)*sumwi)/(sumwi-1); if(sd) sq_sumi = sqrt(sq_sumi); if(std::isnan(sq_sumi)) sq_sumi = NA_REAL; } out[j] = (double)sq_sumi; } } if(drop) { Rf_setAttrib(out, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return out; } else { List res(l); for(int j = l; j--; ) { res[j] = out[j]; SHALLOW_DUPLICATE_ATTRIB(res[j], x[j]); } SHALLOW_DUPLICATE_ATTRIB(res, x); Rf_setAttrib(res, R_RowNamesSymbol, Rf_ScalarInteger(1)); return res; } } else { // With groups and weights List out(l); int gss = g.size(); if(wgs != gss) stop("length(w) must match length(g)"); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector sq_sumj(ng, NA_REAL), sumj(ng), sumwj(ng); // better for valgrind // = no_init_vector(ng), sumwj = no_init_vector(ng); // double sumj[ng], sumwj[ng]; for(int i = gss; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; if(std::isnan(sq_sumj[g[i]-1])) { sumj[g[i]-1] = column[i]*wg[i]; sumwj[g[i]-1] = wg[i]; sq_sumj[g[i]-1] = pow(column[i],2)*wg[i]; } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; sq_sumj[g[i]-1] += pow(column[i],2)*wg[i]; } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = sqrt((sq_sumj[i] - pow(sumj[i]/sumwj[i],2)*sumwj[i])/(sumwj[i]-1)); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/sumwj[i],2)*sumwj[i])/(sumwj[i]-1); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } SHALLOW_DUPLICATE_ATTRIB(sq_sumj, column); out[j] = sq_sumj; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector sq_sumj(ng); std::vector sumwj(ng), sumj(ng); int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(sq_sumj[g[i]-1])) continue; if(std::isnan(column[i]) || std::isnan(wg[i])) { sq_sumj[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; sq_sumj[g[i]-1] += pow(column[i],2)*wg[i]; } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = sqrt((sq_sumj[i] - pow(sumj[i]/sumwj[i],2)*sumwj[i])/(sumwj[i]-1)); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/sumwj[i],2)*sumwj[i])/(sumwj[i]-1); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } SHALLOW_DUPLICATE_ATTRIB(sq_sumj, column); out[j] = sq_sumj; } } SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return out; } } } } collapse/src/data.table_init.c0000644000176200001440000002063414111703345016035 0ustar liggesusers#include "data.table.h" /* This code is adapted from the data.table package: http://r-datatable.com and licensed under a Mozilla Public License 2.0 (MPL-2.0) license. */ #include // #include // #include // global constants extern in data.table.h for gcc10 -fno-common; #4091 // these are written to once here on initialization, but because of that write they can't be declared const SEXP char_integer64; SEXP char_nanotime; SEXP char_factor; SEXP char_ordered; SEXP char_dataframe; SEXP char_datatable; SEXP char_sf; // not currently needed (base_radixsort uses install), but perhaps later.. SEXP sym_sorted; // SEXP sym_maxgrpn; // SEXP sym_starts; // SEXP char_starts; SEXP sym_index; SEXP sym_inherits; SEXP sym_sf_column; SEXP SelfRefSymbol; SEXP sym_datatable_locked; SEXP sym_collapse_DT_alloccol; double NA_INT64_D; long long NA_INT64_LL; Rcomplex NA_CPLX; size_t sizes[100]; // max appears to be FUNSXP = 99, see Rinternals.h size_t typeorder[100]; // -> Needed for SIZEOF macro used in rbindlist Howver TYPEORDER macro and typeof is not used... static void setSizes() { for (int i=0; i<100; ++i) { sizes[i]=0; typeorder[i]=0; } // only these types are currently allowed as column types : sizes[LGLSXP] = sizeof(int); typeorder[LGLSXP] = 0; sizes[RAWSXP] = sizeof(Rbyte); typeorder[RAWSXP] = 1; sizes[INTSXP] = sizeof(int); typeorder[INTSXP] = 2; // integer and factor sizes[REALSXP] = sizeof(double); typeorder[REALSXP] = 3; // numeric and integer64 sizes[CPLXSXP] = sizeof(Rcomplex); typeorder[CPLXSXP] = 4; sizes[STRSXP] = sizeof(SEXP *); typeorder[STRSXP] = 5; sizes[VECSXP] = sizeof(SEXP *); typeorder[VECSXP] = 6; // list column if (sizeof(char *)>8) error("Pointers are %d bytes, greater than 8. We have not tested on any architecture greater than 64bit yet.", sizeof(char *)); // One place we need the largest sizeof is the working memory malloc in reorder.c } // before it was SEXP attribute_visible SEXP collapse_init(SEXP mess) // void SEXP mess DllInfo *info // relies on pkg/src/Makevars to mv data.table.so to datatable.so { // R_registerRoutines(info, NULL, callMethods, NULL, externalMethods); // R_useDynamicSymbols(info, FALSE); setSizes(); const char *msg = "... failed. Please forward this message to maintainer('collapse')."; if ((int)NA_INTEGER != (int)INT_MIN) error("Checking NA_INTEGER [%d] == INT_MIN [%d] %s", NA_INTEGER, INT_MIN, msg); if ((int)NA_INTEGER != (int)NA_LOGICAL) error("Checking NA_INTEGER [%d] == NA_LOGICAL [%d] %s", NA_INTEGER, NA_LOGICAL, msg); if (sizeof(int) != 4) error("Checking sizeof(int) [%d] is 4 %s", sizeof(int), msg); if (sizeof(double) != 8) error("Checking sizeof(double) [%d] is 8 %s", sizeof(double), msg); // 8 on both 32bit and 64bit // alignof not available in C99: if (alignof(double) != 8) error("Checking alignof(double) [%d] is 8 %s", alignof(double), msg); // 8 on both 32bit and 64bit if (sizeof(long long) != 8) error("Checking sizeof(long long) [%d] is 8 %s", sizeof(long long), msg); if (sizeof(char *) != 4 && sizeof(char *) != 8) error("Checking sizeof(pointer) [%d] is 4 or 8 %s", sizeof(char *), msg); if (sizeof(SEXP) != sizeof(char *)) error("Checking sizeof(SEXP) [%d] == sizeof(pointer) [%d] %s", sizeof(SEXP), sizeof(char *), msg); if (sizeof(uint64_t) != 8) error("Checking sizeof(uint64_t) [%d] is 8 %s", sizeof(uint64_t), msg); if (sizeof(int64_t) != 8) error("Checking sizeof(int64_t) [%d] is 8 %s", sizeof(int64_t), msg); if (sizeof(signed char) != 1) error("Checking sizeof(signed char) [%d] is 1 %s", sizeof(signed char), msg); if (sizeof(int8_t) != 1) error("Checking sizeof(int8_t) [%d] is 1 %s", sizeof(int8_t), msg); if (sizeof(uint8_t) != 1) error("Checking sizeof(uint8_t) [%d] is 1 %s", sizeof(uint8_t), msg); if (sizeof(int16_t) != 2) error("Checking sizeof(int16_t) [%d] is 2 %s", sizeof(int16_t), msg); if (sizeof(uint16_t) != 2) error("Checking sizeof(uint16_t) [%d] is 2 %s", sizeof(uint16_t), msg); SEXP tmp = PROTECT(allocVector(INTSXP,2)); if (LENGTH(tmp)!=2) error("Checking LENGTH(allocVector(INTSXP,2)) [%d] is 2 %s", LENGTH(tmp), msg); if (TRUELENGTH(tmp)!=0) error("Checking TRUELENGTH(allocVector(INTSXP,2)) [%d] is 0 %s", TRUELENGTH(tmp), msg); UNPROTECT(1); // According to IEEE (http://en.wikipedia.org/wiki/IEEE_754-1985#Zero) we can rely on 0.0 being all 0 bits. // But check here anyway just to be sure, just in case this answer is right (http://stackoverflow.com/a/2952680/403310). int i = 314; memset(&i, 0, sizeof(int)); if (i != 0) error("Checking memset(&i,0,sizeof(int)); i == (int)0 %s", msg); unsigned int ui = 314; memset(&ui, 0, sizeof(unsigned int)); if (ui != 0) error("Checking memset(&ui, 0, sizeof(unsigned int)); ui == (unsigned int)0 %s", msg); double d = 3.14; memset(&d, 0, sizeof(double)); if (d != 0.0) error("Checking memset(&d, 0, sizeof(double)); d == (double)0.0 %s", msg); long double ld = 3.14; memset(&ld, 0, sizeof(long double)); if (ld != 0.0) error("Checking memset(&ld, 0, sizeof(long double)); ld == (long double)0.0 %s", msg); // Variables rather than #define for NA_INT64 to ensure correct usage; i.e. not casted NA_INT64_LL = LLONG_MIN; NA_INT64_D = LLtoD(NA_INT64_LL); if (NA_INT64_LL != DtoLL(NA_INT64_D)) error("Conversion of NA_INT64 via double failed %lld!=%lld", NA_INT64_LL, DtoLL(NA_INT64_D)); // LLONG_MIN when punned to double is the sign bit set and then all zeros in exponent and significand i.e. -0.0 // That's why we must never test for NA_INT64_D using == in double type. Must always DtoLL and compare long long types. // Assigning NA_INT64_D to a REAL is ok however. if (NA_INT64_D != 0.0) error("NA_INT64_D (negative -0.0) is not == 0.0."); if (NA_INT64_D != -0.0) error("NA_INT64_D (negative -0.0) is not ==-0.0."); if (ISNAN(NA_INT64_D)) error("ISNAN(NA_INT64_D) is TRUE but should not be"); if (isnan(NA_INT64_D)) error("isnan(NA_INT64_D) is TRUE but should not be"); NA_CPLX.r = NA_REAL; // NA_REAL is defined as R_NaReal which is not a strict constant and thus initializer {NA_REAL, NA_REAL} can't be used in .h NA_CPLX.i = NA_REAL; // https://github.com/Rdatatable/data.table/pull/3689/files#r304117234 // create needed strings in advance for speed, same techique as R_*Symbol // Following R-exts 5.9.4; paragraph and example starting "Using install ..." // either use PRINTNAME(install()) or R_PreserveObject(mkChar()) here. char_integer64 = PRINTNAME(install("integer64")); char_nanotime = PRINTNAME(install("nanotime")); // char_starts = PRINTNAME(sym_starts = install("starts")); char_factor = PRINTNAME(install("factor")); char_ordered = PRINTNAME(install("ordered")); char_dataframe = PRINTNAME(install("data.frame")); char_datatable = PRINTNAME(install("data.table")); char_sf = PRINTNAME(install("sf")); if (TYPEOF(char_integer64) != CHARSXP) { // checking one is enough in case of any R-devel changes error("PRINTNAME(install(\"integer64\")) has returned %s not %s", type2char(TYPEOF(char_integer64)), type2char(CHARSXP)); // # nocov } // create commonly used symbols, same as R_*Symbol but internal to DT // Not really for speed but to avoid leak in situations like setAttrib(DT, install(), allocVector()) where // the allocVector() can happen first and then the install() could gc and free it before it is protected // within setAttrib. Thanks to Bill Dunlap finding and reporting. Using these symbols instead of install() // avoids the gc without needing an extra PROTECT and immediate UNPROTECT after the setAttrib which would // look odd (and devs in future might be tempted to remove them). Avoiding passing install() to API calls // keeps the code neat and readable. Also see grep's added to CRAN_Release.cmd to find such calls. // not currently needed (base_radixsort uses install), but perhaps later.. sym_sorted = install("sorted"); // sym_maxgrpn = install("maxgrpn"); sym_index = install("index"); sym_inherits = install("inherits"); sym_sf_column = install("sf_column"); SelfRefSymbol = install(".internal.selfref"); sym_datatable_locked = install(".data.table.locked"); sym_collapse_DT_alloccol = install("collapse_DT_alloccol"); return mess; } inline long long DtoLL(double x) { union {double d; int64_t i64;} u; u.d = x; return (long long)u.i64; } inline double LLtoD(long long x) { union {double d; int64_t i64;} u; u.i64 = (int64_t)x; return u.d; } collapse/src/varying.cpp0000644000176200001440000001223414174223734015037 0ustar liggesusers// [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; template LogicalVector varyingCppImpl(Vector x, int ng, IntegerVector g, bool any_group) { int l = x.size(); if(l < 2) return Rf_ScalarLogical(false); // Prevents seqfault for numeric(0) #101 typedef typename Rcpp::traits::storage_type::type storage_t; auto isnanT = (RTYPE == REALSXP) ? [](storage_t x) { return x != x; } : [](storage_t x) { return x == Vector::get_na(); }; if(ng == 0) { // Note: Does not return NA if all NA... can be checked with fnobs ... int j = l-1; storage_t vi = x[j]; while(isnanT(vi) && j!=0) vi = x[--j]; if(j != 0) for(int i = j; i--; ) if(!isnanT(x[i]) && x[i] != vi) return Rf_ScalarLogical(true); return Rf_ScalarLogical(false); } else { // with groups if(g.size() != l) stop("length(g) must match length(x)"); Vector valg(ng, Vector::get_na()); if(any_group) { for(int i = 0; i != l; ++i) { if(isnanT(x[i])) continue; if(isnanT(valg[g[i]-1])) { valg[g[i]-1] = x[i]; } else { if(x[i] != valg[g[i]-1]) return Rf_ScalarLogical(true); } } return Rf_ScalarLogical(false); } else { LogicalVector varyg(ng, NA_LOGICAL); int *pvaryg = LOGICAL(varyg), gi; // seems to bring a tiny gain.. for(int i = 0; i != l; ++i) { if(isnanT(x[i])) continue; gi = g[i]-1; // slightly faster if(isnanT(valg[gi])) { valg[gi] = x[i]; pvaryg[gi] = false; } else { if(!pvaryg[gi] && x[i] != valg[gi]) { pvaryg[gi] = true; // ++ngs; // Omitting this is faster for most datasets -> most are ordered ! (i.e. PRIO Grid 1.27 vs. 1.14 seconds) // if(ngs == ng) break; } } } // Rf_setAttrib(varyg, R_NamesSymbol, R_NilValue); return varyg; } } } template <> LogicalVector varyingCppImpl(Vector x, int ng, IntegerVector g, bool any_group) { stop("Not supported SEXP type!"); } template <> LogicalVector varyingCppImpl(Vector x, int ng, IntegerVector g, bool any_group) { stop("Not supported SEXP type!"); } template <> LogicalVector varyingCppImpl(Vector x, int ng, IntegerVector g, bool any_group) { stop("Not supported SEXP type!"); } template <> LogicalVector varyingCppImpl(Vector x, int ng, IntegerVector g, bool any_group) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] LogicalVector varyingCpp(const SEXP& x, int ng = 0, const IntegerVector& g = 0, bool any_group = true){ RCPP_RETURN_VECTOR(varyingCppImpl, x, ng, g, any_group); } template SEXP varyingmCppImpl(Matrix x, int ng, IntegerVector g, bool any_group, bool drop) { int col = x.ncol(); LogicalMatrix out = (ng == 0 || any_group) ? no_init_matrix(1, col) : no_init_matrix(ng, col); for(int j = col; j--; ) out(_, j) = varyingCppImpl(x(_, j), ng, g, any_group); if(drop && any_group) { Rf_setAttrib(out, R_DimSymbol, R_NilValue); // Rf_dimgets(out, R_NilValue); -> Doesn't work ! // Rf_setAttrib(out, R_NamesSymbol, colnames(x)); Rf_setAttrib(out, R_NamesSymbol, colnames(x)); } else { colnames(out) = colnames(x); } return out; } template <> SEXP varyingmCppImpl(Matrix x, int ng, IntegerVector g, bool any_group, bool drop) { stop("Not supported SEXP type!"); } template <> SEXP varyingmCppImpl(Matrix x, int ng, IntegerVector g, bool any_group, bool drop) { stop("Not supported SEXP type!"); } template <> SEXP varyingmCppImpl(Matrix x, int ng, IntegerVector g, bool any_group, bool drop) { stop("Not supported SEXP type!"); } template <> SEXP varyingmCppImpl(Matrix x, int ng, IntegerVector g, bool any_group, bool drop) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] SEXP varyingmCpp(const SEXP& x, int ng = 0, const IntegerVector& g = 0, bool any_group = true, bool drop = true){ RCPP_RETURN_MATRIX(varyingmCppImpl, x, ng, g, any_group, drop); } // [[Rcpp::export]] SEXP varyinglCpp(const List& x, int ng = 0, const IntegerVector& g = 0, bool any_group = true, bool drop = true) { int l = x.size(); List out(l); for(int j = l; j--; ) { switch(TYPEOF(x[j])) { case REALSXP: out[j] = varyingCppImpl(x[j], ng, g, any_group); break; case INTSXP: out[j] = varyingCppImpl(x[j], ng, g, any_group); break; case STRSXP: out[j] = varyingCppImpl(x[j], ng, g, any_group); break; case LGLSXP: out[j] = varyingCppImpl(x[j], ng, g, any_group); break; default: stop("Not supported SEXP type !"); } } if(drop && any_group) { LogicalVector outl = no_init_vector(l); for(int i = l; i--; ) outl[i] = out[i]; Rf_setAttrib(outl, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return outl; } else { SHALLOW_DUPLICATE_ATTRIB(out, x); if(ng == 0 || any_group) Rf_setAttrib(out, R_RowNamesSymbol, Rf_ScalarInteger(1)); else Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return out; } } collapse/src/fcumsum.c0000644000176200001440000002474414174223734014510 0ustar liggesusers#include "collapse_c.h" void fcumsum_double_impl(double *pout, double *px, int ng, int *pg, int narm, int fill, int l) { if(ng == 0) { if(narm <= 0) { pout[0] = px[0]; for(int i = 1; i != l; ++i) pout[i] = pout[i-1] + px[i]; } else if(fill) { pout[0] = ISNAN(px[0]) ? 0.0 : px[0]; for(int i = 1; i != l; ++i) pout[i] = pout[i-1] + (ISNAN(px[i]) ? 0.0 : px[i]); } else { double last = 0; for(int i = 0; i != l; ++i) { if(ISNAN(px[i])) pout[i] = px[i]; else pout[i] = last += px[i]; } } } else { double last[ng+1]; // Also pass pointer to function ?? memset(last, 0.0, sizeof(double) * (ng+1)); if(narm <= 0) { for(int i = 0; i != l; ++i) last[pg[i]] = pout[i] = last[pg[i]] + px[i]; } else if(fill) { for(int i = 0; i != l; ++i) last[pg[i]] = pout[i] = last[pg[i]] + (ISNAN(px[i]) ? 0.0 : px[i]); } else { for(int i = 0; i != l; ++i) { if(ISNAN(px[i])) pout[i] = px[i]; else last[pg[i]] = pout[i] = last[pg[i]] + px[i]; } } } } void fcumsum_double_impl_order(double *pout, double *px, int ng, int *pg, int *po, int narm, int fill, int l) { if(ng == 0) { if(narm <= 0) { --pout; --px; pout[po[0]] = px[po[0]]; for(int i = 1; i != l; ++i) pout[po[i]] = pout[po[i-1]] + px[po[i]]; } else if(fill) { --pout; --px; pout[po[0]] = ISNAN(px[po[0]]) ? 0.0 : px[po[0]]; for(int i = 1; i != l; ++i) pout[po[i]] = pout[po[i-1]] + (ISNAN(px[po[i]]) ? 0.0 : px[po[i]]); } else { double last = 0; for(int i = 0, poi; i != l; ++i) { poi = po[i]-1; if(ISNAN(px[poi])) pout[poi] = px[poi]; else pout[poi] = last += px[poi]; } } } else { double last[ng+1]; // Also pass pointer to function ?? memset(last, 0.0, sizeof(double) * (ng+1)); if(narm <= 0) { for(int i = 0, poi; i != l; ++i) { poi = po[i]-1; last[pg[poi]] = pout[poi] = last[pg[poi]] + px[poi]; } } else if(fill) { for(int i = 0, poi; i != l; ++i) { poi = po[i]-1; last[pg[poi]] = pout[poi] = last[pg[poi]] + (ISNAN(px[poi]) ? 0.0 : px[poi]); } } else { for(int i = 0, poi; i != l; ++i) { poi = po[i]-1; if(ISNAN(px[poi])) pout[poi] = px[poi]; else last[pg[poi]] = pout[poi] = last[pg[poi]] + px[poi]; } } } } void fcumsum_int_impl(int *pout, int *px, int ng, int *pg, int narm, int fill, int l) { long long ckof; if(ng == 0) { if(narm <= 0) { int i = 1; ckof = pout[0] = px[0]; if(ckof == NA_INTEGER) { --i; ckof = 0; } for( ; i != l; ++i) { if(px[i] == NA_INTEGER) { for( ; i != l; ++i) pout[i] = NA_INTEGER; break; } pout[i] = ckof += px[i]; } } else if(fill) { ckof = pout[0] = (px[0] == NA_INTEGER) ? 0 : px[0]; for(int i = 1; i != l; ++i) { if(px[i] != NA_INTEGER) ckof += (long long)px[i]; pout[i] = (int)ckof; } } else { ckof = 0; for(int i = 0; i != l; ++i) { if(px[i] == NA_INTEGER) pout[i] = NA_INTEGER; else pout[i] = ckof += px[i]; } } if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. Use fcumsum(as.numeric(x))."); } else { int last[ng+1]; // Also pass pointer to function ?? memset(last, 0, sizeof(int) * (ng+1)); if(narm <= 0) { for(int i = 0, lsi; i != l; ++i) { if(px[i] == NA_INTEGER) { pout[i] = last[pg[i]] = NA_INTEGER; continue; } lsi = last[pg[i]]; if(lsi == NA_INTEGER) pout[i] = NA_INTEGER; else { ckof = (long long)lsi + px[i]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); last[pg[i]] = pout[i] = (int)ckof; } } } else if(fill) { for(int i = 0; i != l; ++i) { if(px[i] == NA_INTEGER) pout[i] = last[pg[i]]; else { ckof = (long long)last[pg[i]] + px[i]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); last[pg[i]] = pout[i] = (int)ckof; } } } else { for(int i = 0; i != l; ++i) { if(px[i] == NA_INTEGER) pout[i] = NA_INTEGER; else { ckof = (long long)last[pg[i]] + px[i]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); last[pg[i]] = pout[i] = (int)ckof; } } } } } void fcumsum_int_impl_order(int *pout, int *px, int ng, int *pg, int *po, int narm, int fill, int l) { long long ckof; if(ng == 0) { if(narm <= 0) { int i = 1, poi; ckof = pout[po[0]-1] = px[po[0]-1]; if(ckof == NA_INTEGER) { --i; ckof = 0; } for( ; i != l; ++i) { poi = po[i]-1; if(px[poi] == NA_INTEGER) { for( ; i != l; ++i) pout[po[i]-1] = NA_INTEGER; break; } pout[poi] = ckof += px[poi]; } } else if(fill) { ckof = pout[po[0]-1] = (px[po[0]-1] == NA_INTEGER) ? 0 : px[po[0]-1]; for(int i = 1, poi; i != l; ++i) { poi = po[i]-1; if(px[poi] != NA_INTEGER) ckof += (long long)px[poi]; pout[poi] = (int)ckof; } } else { ckof = 0; for(int i = 0, poi; i != l; ++i) { poi = po[i]-1; if(px[poi] == NA_INTEGER) pout[poi] = NA_INTEGER; else pout[poi] = ckof += px[poi]; } } if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. Use fcumsum(as.numeric(x))."); } else { int last[ng+1]; // Also pass pointer to function ?? memset(last, 0, sizeof(int) * (ng+1)); if(narm <= 0) { for(int i = 0, poi, lsi; i != l; ++i) { poi = po[i]-1; if(px[poi] == NA_INTEGER) { pout[poi] = last[pg[poi]] = NA_INTEGER; continue; } lsi = last[pg[poi]]; if(lsi == NA_INTEGER) pout[poi] = NA_INTEGER; else { ckof = (long long)lsi + px[poi]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); last[pg[poi]] = pout[poi] = (int)ckof; } } } else if(fill) { for(int i = 0, poi; i != l; ++i) { poi = po[i]-1; if(px[poi] == NA_INTEGER) pout[poi] = last[pg[poi]]; else { ckof = (long long)last[pg[poi]] + px[poi]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); last[pg[poi]] = pout[poi] = (int)ckof; } } } else { for(int i = 0, poi; i != l; ++i) { poi = po[i]-1; if(px[poi] == NA_INTEGER) pout[poi] = NA_INTEGER; else { ckof = (long long)last[pg[poi]] + px[poi]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); last[pg[poi]] = pout[poi] = (int)ckof; } } } } } SEXP fcumsumC(SEXP x, SEXP Rng, SEXP g, SEXP o, SEXP Rnarm, SEXP Rfill) { int l = length(x), tx = TYPEOF(x), ng = asInteger(Rng), narm = asLogical(Rnarm), fill = asLogical(Rfill), *pg = INTEGER(g), ord = length(o) > 1, *po = ord ? INTEGER(o) : pg; if (l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng > 0 && l != length(g)) error("length(g) must match length(x)"); if(ord && l != length(o)) error("length(o) must match length(x)"); if(tx == LGLSXP) tx = INTSXP; SEXP out = PROTECT(allocVector(tx, l)); switch(tx) { case REALSXP: if(ord) fcumsum_double_impl_order(REAL(out), REAL(x), ng, pg, po, narm, fill, l); else fcumsum_double_impl(REAL(out), REAL(x), ng, pg, narm, fill, l); break; case INTSXP: if(ord) fcumsum_int_impl_order(INTEGER(out), INTEGER(x), ng, pg, po, narm, fill, l); else fcumsum_int_impl(INTEGER(out), INTEGER(x), ng, pg, narm, fill, l); break; default: error("Unsupported SEXP type"); } SHALLOW_DUPLICATE_ATTRIB(out, x); UNPROTECT(1); return out; } SEXP fcumsummC(SEXP x, SEXP Rng, SEXP g, SEXP o, SEXP Rnarm, SEXP Rfill) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); int tx = TYPEOF(x), l = INTEGER(dim)[0], col = INTEGER(dim)[1], ng = asInteger(Rng), narm = asLogical(Rnarm), fill = asLogical(Rfill), *pg = INTEGER(g), ord = length(o) > 1, *po = ord ? INTEGER(o) : pg; if (l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng > 0 && l != length(g)) error("length(g) must match nrow(x)"); if(ord && l != length(o)) error("length(o) must match nrow(x)"); if(tx == LGLSXP) tx = INTSXP; SEXP out = PROTECT(allocVector(tx, l * col)); switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); if(ord) for(int j = 0; j != col; ++j) fcumsum_double_impl_order(pout + j*l, px + j*l, ng, pg, po, narm, fill, l); else for(int j = 0; j != col; ++j) fcumsum_double_impl(pout + j*l, px + j*l, ng, pg, narm, fill, l); break; } case INTSXP: { int *px = INTEGER(x), *pout = INTEGER(out); if(ord) for(int j = 0; j != col; ++j) fcumsum_int_impl_order(pout + j*l, px + j*l, ng, pg, po, narm, fill, l); else for(int j = 0; j != col; ++j) fcumsum_int_impl(pout + j*l, px + j*l, ng, pg, narm, fill, l); break; } default: error("Unsupported SEXP type"); } SHALLOW_DUPLICATE_ATTRIB(out, x); UNPROTECT(1); return out; } SEXP fcumsumlC(SEXP x, SEXP Rng, SEXP g, SEXP o, SEXP Rnarm, SEXP Rfill) { int l = length(x); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 SEXP out = PROTECT(allocVector(VECSXP, l)), *pout = SEXPPTR(out), *px = SEXPPTR(x); for(int j = 0; j != l; ++j) pout[j] = fcumsumC(px[j], Rng, g, o, Rnarm, Rfill); SHALLOW_DUPLICATE_ATTRIB(out, x); UNPROTECT(1); return out; } collapse/src/fbetween_fwithin.cpp0000644000176200001440000011642314174223734016714 0ustar liggesusers#include using namespace Rcpp; // NOTE: Special case is set_mean = -Inf, which is when on the R side mean = "overall.mean" // TODO: Best simply adding set_mean to the mean calculation, or better other solution ? // [[Rcpp::export]] NumericVector BWCpp(const NumericVector& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, double theta = 1, double set_mean = 0, bool B = false, bool fill = false) { int l = x.size(); if(l < 1) return x; // Prevents segfault for numeric(0) #101 NumericVector out = no_init_vector(l); if (Rf_isNull(w)) { // No weights if (ng == 0) { if(!B && set_mean == R_NegInf) stop("For centering on the overall mean a grouping vector needs to be supplied"); if(narm) { int j = l-1, n = 1; // 1 because for-loop starts from 2 double sum = x[j]; while(std::isnan(sum) && j!=0) sum = x[--j]; if(j != 0) for(int i = j; i--; ) { if(std::isnan(x[i])) continue; sum += x[i]; // Fastest ? ++n; } sum = theta * sum/n - set_mean; // best ? if(B) { if(fill) std::fill(out.begin(), out.end(), sum); // (double)sum // fastest ? -> yes ! else { for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) out[i] = x[i]; else out[i] = sum; // double conversion -> nope, slower } } } else { out = x - sum; // conversion to double not necessary } } else { double sum = 0; for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { sum = x[i]; break; } else { sum += x[i]; } } sum = theta * sum/l - set_mean; // best ? if(B) { std::fill(out.begin(), out.end(), sum); // (double)sum) // fastest ? } else { out = x - sum; // conversion to double not necessary } } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericVector sum(ng, NA_REAL); // Other way ? IntegerVector n(ng, 1); // could also do no_init_vector and then add n[g[i]-1] = 1 in fir if condition... -> Nope, that is slower for(int i = l; i--; ) { if(!std::isnan(x[i])) { // faster way to code this ? -> Not Bad at all -> index for g[i]-1? -> Nope, no noticeable improvement if(std::isnan(sum[g[i]-1])) sum[g[i]-1] = x[i]; else { sum[g[i]-1] += x[i]; ++n[g[i]-1]; } } } if(B) { for(int i = ng; i--; ) sum[i] /= n[i]; if(fill) { for(int i = 0; i != l; ++i) out[i] = sum[g[i]-1]; } else { for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) out[i] = x[i]; else out[i] = sum[g[i]-1]; } } } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { for(int i = ng; i--; ) sum[i] /= n[i]; // faster using two loops? or combine ? -> two loops (this solution) is a lot faster ! } else { for(int i = ng; i--; ) sum[i] = theta / n[i] * sum[i] - set_mean; } for(int i = 0; i != l; ++i) out[i] = x[i] - sum[g[i]-1]; // best loop ? -> just as fast as the other one ! } else { int on = 0; double osum = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sum[i])) continue; // solves the issue ! osum += sum[i]; on += n[i]; sum[i] /= n[i]; // fastest ? } osum = osum/on; if(theta != 1) { sum = theta * sum; osum = theta * osum; } for(int i = 0; i != l; ++i) out[i] = x[i] - sum[g[i]-1] + osum; } } } else { NumericVector sum(ng); // // good? -> yes, but not initializing is numerically unstable.. // better for valgrind IntegerVector gsv = (Rf_isNull(gs)) ? IntegerVector(ng) : as(gs); // no_init_vector(ng); int ngs = 0; if(Rf_isNull(gs)) { // gsv = IntegerVector(ng); // std::fill(gsv.begin(), gsv.end(), 0); for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { if(!std::isnan(sum[g[i]-1])) { sum[g[i]-1] = x[i]; ++ngs; if(ngs == ng) break; } } else { sum[g[i]-1] += x[i]; ++gsv[g[i]-1]; } } } else { // gsv = gs; if(gsv.size() != ng) stop("Vector of group-sizes must match number of groups"); for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { if(!std::isnan(sum[g[i]-1])) { sum[g[i]-1] = x[i]; ++ngs; if(ngs == ng) break; } } else { sum[g[i]-1] += x[i]; } } } if(B) { for(int i = ng; i--; ) sum[i] /= gsv[i]; for(int i = 0; i != l; ++i) out[i] = sum[g[i]-1]; } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { for(int i = ng; i--; ) sum[i] /= gsv[i]; } else { for(int i = ng; i--; ) sum[i] = theta / gsv[i] * sum[i] - set_mean; } for(int i = 0; i != l; ++i) out[i] = x[i] - sum[g[i]-1]; } else { int on = 0; double osum = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sum[i])) continue; // solves the issue ! osum += sum[i]; on += gsv[i]; sum[i] /= gsv[i]; // fastest ? } osum = osum/on; if(theta != 1) { sum = theta * sum; osum = theta * osum; } for(int i = 0; i != l; ++i) out[i] = x[i] - sum[g[i]-1] + osum; } } } } } else { // With weights NumericVector wg = w; // wg(w) Identical speed if(l != wg.size()) stop("length(w) must match length(x)"); if (ng == 0) { if(!B && set_mean == R_NegInf) stop("For centering on the overall mean a grouping vector needs to be supplied"); if(narm) { int j = l-1; // 1 because for-loop starts from 2 while((std::isnan(x[j]) || std::isnan(wg[j])) && j!=0) --j; // This does not make a difference in performance but is more parsimonious. double sum = x[j]*wg[j], sumw = wg[j]; if(j != 0) for(int i = j; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i])) continue; sum += x[i]*wg[i]; // Fastest ? sumw += wg[i]; } sum = theta * sum/sumw - set_mean; // best ? if(B) { if(fill) std::fill(out.begin(), out.end(), sum); // (double)sum // fastest ? else { for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) out[i] = x[i]; else out[i] = sum; // double conversion ? } } } else { out = x - sum; // conversion to double not necessary } } else { double sum = 0, sumw = 0; for(int i = 0; i != l; ++i) { if(std::isnan(x[i]) || std::isnan(wg[i])) { // good, check both ? -> yes sum = x[i]+wg[i]; break; } else { sum += x[i]*wg[i]; sumw += wg[i]; } } sum = theta * sum/sumw - set_mean; // best ? if(B) { std::fill(out.begin(), out.end(), sum); // (double)sum// fastes ? } else { out = x - sum; // conversion to double not necessary } } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericVector sum(ng, NA_REAL), sumw(ng); // Other way ? -> Nope, this is as good as it gets // better for valgrind // NumericVector sumw = no_init_vector(ng); // what if only NA ? -> Works for some reason no problem, and faster for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i])) continue; // faster way to code this ? -> Not Bad at all -> index for g[i]-1? -> Nope, no noticeable improvement if(std::isnan(sum[g[i]-1])) { sum[g[i]-1] = x[i]*wg[i]; sumw[g[i]-1] = wg[i]; } else { sum[g[i]-1] += x[i]*wg[i]; sumw[g[i]-1] += wg[i]; } } if(B) { sum = sum/sumw; if(fill) { for(int i = 0; i != l; ++i) out[i] = sum[g[i]-1]; } else { for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) out[i] = x[i]; else out[i] = sum[g[i]-1]; } } } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { sum = sum/sumw; } else { sum = theta * sum/sumw - set_mean; } for(int i = 0; i != l; ++i) out[i] = x[i] - sum[g[i]-1]; } else { double osum = 0, osumw = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sum[i])) continue; // solves the issue ! osum += sum[i]; osumw += sumw[i]; sum[i] /= sumw[i]; // fastest ? } osum = osum/osumw; if(theta != 1) { sum = theta * sum; osum = theta * osum; } for(int i = 0; i != l; ++i) out[i] = x[i] - sum[g[i]-1] + osum; } } } else { NumericVector sum(ng), sumw(ng); // good? -> yes // = no_init_vector// Not initializing numerically unstable ! int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(x[i]) || std::isnan(wg[i])) { if(!std::isnan(sum[g[i]-1])) { sum[g[i]-1] = sumw[g[i]-1] = x[i]+wg[i]; // or NA_REAL ? -> Nope, good ! ++ngs; if(ngs == ng) break; } } else { sum[g[i]-1] += x[i]*wg[i]; sumw[g[i]-1] += wg[i]; } } if(B) { sum = sum/sumw; for(int i = 0; i != l; ++i) out[i] = sum[g[i]-1]; } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { sum = sum/sumw; } else { sum = theta * sum/sumw - set_mean; } for(int i = 0; i != l; ++i) out[i] = x[i] - sum[g[i]-1]; } else { double osum = 0, osumw = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sum[i])) continue; // solves the issue ! osum += sum[i]; osumw += sumw[i]; sum[i] /= sumw[i]; // fastest ? } osum = osum/osumw; if(theta != 1) { sum = theta * sum; osum = theta * osum; } for(int i = 0; i != l; ++i) out[i] = x[i] - sum[g[i]-1] + osum; } } } } } SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } // [[Rcpp::export]] NumericMatrix BWmCpp(const NumericMatrix& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, double theta = 1, double set_mean = 0, bool B = false, bool fill = false) { int l = x.nrow(), col = x.ncol(); NumericMatrix out = no_init_matrix(l, col); if (Rf_isNull(w)) { // No weights ! if(ng == 0) { if(!B && set_mean == R_NegInf) stop("For centering on the overall mean a grouping vector needs to be supplied"); if(narm) { for(int j = col; j--; ) { // Instead Am(j,_) you can use Am.row(j). NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); int k = l-1, nj = 1; double sumj = column[k]; while(std::isnan(sumj) && k!=0) sumj = column[--k]; if(k != 0) for(int i = k; i--; ) { if(std::isnan(column[i])) continue; sumj += column[i]; ++nj; } sumj = theta * sumj/nj - set_mean; // best ? if(B) { if(fill) std::fill(outj.begin(), outj.end(), sumj); // (double)sumj else { for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) outj[i] = column[i]; else outj[i] = sumj; } } } else { outj = column - sumj; } } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); double sumj = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { sumj = column[i]; break; } else { sumj += column[i]; } } sumj = theta * sumj/l - set_mean; // best ? if(B) { std::fill(outj.begin(), outj.end(), sumj); // (double)sumj } else { outj = column - sumj; } } } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); NumericVector sumj(ng, NA_REAL); // std::vector // faster than NumericVector ? std::vector nj(ng); // int nj[ng]; // use vector also ? for(int i = l; i--; ) { if(!std::isnan(column[i])) { if(std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; nj[g[i]-1] = 1; } else { sumj[g[i]-1] += column[i]; ++nj[g[i]-1]; } } } if(B) { for(int i = ng; i--; ) sumj[i] /= nj[i]; if(fill) { for(int i = 0; i != l; ++i) outj[i] = sumj[g[i]-1]; } else { for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) outj[i] = column[i]; else outj[i] = sumj[g[i]-1]; } } } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { for(int i = ng; i--; ) sumj[i] /= nj[i]; } else { for(int i = ng; i--; ) sumj[i] = theta / nj[i] * sumj[i] - set_mean; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { int on = 0; double osum = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; on += nj[i]; sumj[i] /= nj[i]; } osum = osum/on; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } } } else { if(Rf_isNull(gs)) { // int gsv[ng], memsize = sizeof(int)*ng; for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); NumericVector sumj(ng); // std::vector // better than array or NumericVector ? std::vector gsv(ng); // memset(gsv, 0, memsize); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]; ++gsv[g[i]-1]; } } if(B) { for(int i = ng; i--; ) sumj[i] /= gsv[i]; for(int i = 0; i != l; ++i) outj[i] = sumj[g[i]-1]; } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { for(int i = ng; i--; ) sumj[i] /= gsv[i]; } else { for(int i = ng; i--; ) sumj[i] = theta / gsv[i] * sumj[i] - set_mean; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { int on = 0; double osum = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; on += gsv[i]; sumj[i] /= gsv[i]; } osum = osum/on; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } } } else { IntegerVector gsv = gs; if(gsv.size() != ng) stop("Vector of group-sizes must match number of groups"); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); NumericVector sumj(ng); // std::vector int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]; } } if(B) { for(int i = ng; i--; ) sumj[i] /= gsv[i]; for(int i = 0; i != l; ++i) outj[i] = sumj[g[i]-1]; } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { for(int i = ng; i--; ) sumj[i] /= gsv[i]; } else { for(int i = ng; i--; ) sumj[i] = theta / gsv[i] * sumj[i] - set_mean; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { int on = 0; double osum = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; on += gsv[i]; sumj[i] /= gsv[i]; } osum = osum/on; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } } } } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match nrow(X)"); if(ng == 0) { if(!B && set_mean == R_NegInf) stop("For centering on the overall mean a grouping vector needs to be supplied"); if(narm) { for(int j = col; j--; ) { // Instead Am(j,_) you can use Am.row(j). NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); int k = l-1; while((std::isnan(column[k]) || std::isnan(wg[k])) && k!=0) --k; double sumj = column[k]*wg[k], sumwj = wg[k]; if(k != 0) for(int i = k; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; sumj += column[i]*wg[i]; sumwj += wg[i]; } sumj = theta * sumj/sumwj - set_mean; // best ? if(B) { if(fill) std::fill(outj.begin(), outj.end(), sumj); // (double)sumj else { for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) outj[i] = column[i]; else outj[i] = sumj; } } } else { outj = column - sumj; } } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); double sumj = 0, sumwj = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { sumj = column[i]+wg[i]; break; } else { sumj += column[i]*wg[i]; sumwj += wg[i]; } } sumj = theta * sumj/sumwj - set_mean; // best ? if(B) { std::fill(outj.begin(), outj.end(), sumj); // (double)sumj } else { outj = column - sumj; } } } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); NumericVector sumj(ng, NA_REAL), sumwj(ng); // best ? // std::vector for(int i = l; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; if(std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]*wg[i]; sumwj[g[i]-1] = wg[i]; } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; } } if(B) { sumj = sumj/sumwj; if(fill) { for(int i = 0; i != l; ++i) outj[i] = sumj[g[i]-1]; } else { for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) outj[i] = column[i]; else outj[i] = sumj[g[i]-1]; } } } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { sumj = sumj/sumwj; } else { sumj = theta * sumj/sumwj - set_mean; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { double osum = 0, osumw = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; osumw += sumwj[i]; sumj[i] /= sumwj[i]; } osum = osum/osumw; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); NumericVector sumj(ng), sumwj(ng); // std::vector int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = sumwj[g[i]-1] = column[i]+wg[i]; // or NA_REAL ? -> Nope, good ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; } } if(B) { sumj = sumj/sumwj; for(int i = 0; i != l; ++i) outj[i] = sumj[g[i]-1]; } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { sumj = sumj/sumwj; } else { sumj = theta * sumj/sumwj - set_mean; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { double osum = 0, osumw = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; osumw += sumwj[i]; sumj[i] /= sumwj[i]; } osum = osum/osumw; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } } } } } SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } // [[Rcpp::export]] List BWlCpp(const List& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, double theta = 1, double set_mean = 0, bool B = false, bool fill = false) { int l = x.size(); List out(l); if (Rf_isNull(w)) { // No weights if (ng == 0) { if(!B && set_mean == R_NegInf) stop("For centering on the overall mean a grouping vector needs to be supplied"); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); int k = row-1, nj = 1; double sumj = column[k]; while(std::isnan(sumj) && k!=0) sumj = column[--k]; if(k != 0) for(int i = k; i--; ) { if(std::isnan(column[i])) continue; sumj += column[i]; ++nj; } sumj = theta * sumj/nj - set_mean; // best ? if(B) { if(fill) out[j] = rep(sumj, row); // rep((double)sumj, row); // good ? else { NumericVector outj = no_init_vector(row); for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) outj[i] = column[i]; else outj[i] = sumj; } out[j] = outj; } } else { out[j] = column - sumj; } SHALLOW_DUPLICATE_ATTRIB(out[j], column); // good ? } } else { for(int j = l; j--; ) { NumericVector column = x[j]; double sumj = 0; int row = column.size(); for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) { sumj = column[i]; break; } else { sumj += column[i]; } } sumj = theta * sumj/row - set_mean; // best ? if(B) { out[j] = rep(sumj, row); // rep((double)sumj, row); } else { out[j] = column - sumj; } SHALLOW_DUPLICATE_ATTRIB(out[j], column); } } } else { // With groups int gss = g.size(); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); if(gss != row) stop("length(g) must match nrow(X)"); NumericVector sumj(ng, NA_REAL); // std::vector std::vector nj(ng, 1); for(int i = row; i--; ) { if(!std::isnan(column[i])) { if(std::isnan(sumj[g[i]-1])) sumj[g[i]-1] = column[i]; else { sumj[g[i]-1] += column[i]; ++nj[g[i]-1]; } } } NumericVector outj = no_init_vector(row); if(B) { for(int i = ng; i--; ) sumj[i] /= nj[i]; if(fill) { for(int i = 0; i != row; ++i) outj[i] = sumj[g[i]-1]; } else { for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) outj[i] = column[i]; else outj[i] = sumj[g[i]-1]; } } } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { for(int i = ng; i--; ) sumj[i] /= nj[i]; } else { for(int i = ng; i--; ) sumj[i] = theta / nj[i] * sumj[i] - set_mean; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { int on = 0; double osum = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; on += nj[i]; sumj[i] /= nj[i]; } osum = osum/on; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } else { if(Rf_isNull(gs)) { // int gsv[ng], memsize = sizeof(int)*ng; for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); if(gss != row) stop("length(g) must match nrow(X)"); NumericVector sumj(ng); // std::vector // memset(gsv, 0, memsize); std::vector gsv(ng); int ngs = 0; for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]; ++gsv[g[i]-1]; } } NumericVector outj = no_init_vector(row); if(B) { for(int i = ng; i--; ) sumj[i] /= gsv[i]; for(int i = 0; i != row; ++i) outj[i] = sumj[g[i]-1]; } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { for(int i = ng; i--; ) sumj[i] /= gsv[i]; } else { for(int i = ng; i--; ) sumj[i] = theta / gsv[i] * sumj[i] - set_mean; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { int on = 0; double osum = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; on += gsv[i]; sumj[i] /= gsv[i]; } osum = osum/on; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } else { IntegerVector gsv = gs; if(gsv.size() != ng) stop("Vector of group-sizes must match number of groups"); for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); if(gss != row) stop("length(g) must match nrow(X)"); NumericVector sumj(ng); // = no_init_vector // Not initializing seems to be numerically unstable ! int ngs = 0; for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]; } } NumericVector outj = no_init_vector(row); if(B) { for(int i = ng; i--; ) sumj[i] /= gsv[i]; for(int i = 0; i != row; ++i) outj[i] = sumj[g[i]-1]; } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { for(int i = ng; i--; ) sumj[i] /= gsv[i]; } else { for(int i = ng; i--; ) sumj[i] = theta / gsv[i] * sumj[i] - set_mean; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { int on = 0; double osum = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; on += gsv[i]; sumj[i] /= gsv[i]; } osum = osum/on; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } } } } else { // With weights NumericVector wg = w; int wgs = wg.size(); if (ng == 0) { if(!B && set_mean == R_NegInf) stop("For centering on the overall mean a grouping vector needs to be supplied"); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); if(row != wgs) stop("length(w) must match nrow(X)"); int k = row-1; while((std::isnan(column[k]) || std::isnan(wg[k])) && k!=0) --k; double sumj = column[k]*wg[k], sumwi = wg[k]; if(k != 0) for(int i = k; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; sumj += column[i]*wg[i]; sumwi += wg[i]; } sumj = theta * sumj/sumwi - set_mean; // best ? if(B) { if(fill) out[j] = rep(sumj, row); // rep((double)sumj, row); else { NumericVector outj = no_init_vector(row); for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) outj[i] = column[i]; else outj[i] = sumj; } out[j] = outj; } } else { out[j] = column - sumj; } SHALLOW_DUPLICATE_ATTRIB(out[j], column); // good like this ? } } else { for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); if(row != wgs) stop("length(w) must match nrow(X)"); double sumj = 0, sumwi = 0; for(int i = 0; i != row; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { sumj = column[i]+wg[i]; break; } else { sumj += column[i]*wg[i]; sumwi += wg[i]; } } sumj = theta * sumj/sumwi - set_mean; // best ? if(B) { out[j] = rep(sumj, row); // rep((double)sumj, row); } else { out[j] = column - sumj; } SHALLOW_DUPLICATE_ATTRIB(out[j], column); } } } else { // With groups int gss = g.size(); if(wgs != gss) stop("length(w) must match length(g)"); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); if(gss != row) stop("length(g) must match nrow(X)"); NumericVector sumj(ng, NA_REAL), sumwj(ng); // std::vector for(int i = row; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; if(std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]*wg[i]; sumwj[g[i]-1] = wg[i]; } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; } } NumericVector outj = no_init_vector(row); if(B) { sumj = sumj/sumwj; if(fill) { for(int i = 0; i != row; ++i) outj[i] = sumj[g[i]-1]; } else { for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) outj[i] = column[i]; else outj[i] = sumj[g[i]-1]; } } } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { sumj = sumj/sumwj; } else { sumj = theta * sumj/sumwj - set_mean; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { double osum = 0, osumw = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; osumw += sumwj[i]; sumj[i] /= sumwj[i]; } osum = osum/osumw; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); if(gss != row) stop("length(g) must match nrow(X)"); NumericVector sumj(ng), sumwj(ng); // std::vector int ngs = 0; for(int i = 0; i != row; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = sumwj[g[i]-1] = column[i]+wg[i]; // or NA_REAL ? -> Nope, good ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; } } NumericVector outj = no_init_vector(row); if(B) { sumj = sumj/sumwj; for(int i = 0; i != row; ++i) outj[i] = sumj[g[i]-1]; } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { sumj = sumj/sumwj; } else { sumj = theta * sumj/sumwj - set_mean; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { double osum = 0, osumw = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; osumw += sumwj[i]; sumj[i] /= sumwj[i]; } osum = osum/osumw; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } } } SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } collapse/src/collapse.h0000644000176200001440000002320714172367040014626 0ustar liggesusers#include using namespace Rcpp; // BWCpp RcppExport SEXP _collapse_BWCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP thetaSEXP, SEXP set_meanSEXP, SEXP BSEXP, SEXP fillSEXP); // BWmCpp RcppExport SEXP _collapse_BWmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP thetaSEXP, SEXP set_meanSEXP, SEXP BSEXP, SEXP fillSEXP); // BWlCpp RcppExport SEXP _collapse_BWlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP thetaSEXP, SEXP set_meanSEXP, SEXP BSEXP, SEXP fillSEXP); // TRACpp RcppExport SEXP _collapse_TRACpp(SEXP xSEXP, SEXP xAGSEXP, SEXP gSEXP, SEXP retSEXP); // TRAmCpp RcppExport SEXP _collapse_TRAmCpp(SEXP xSEXP, SEXP xAGSEXP, SEXP gSEXP, SEXP retSEXP); // TRAlCpp RcppExport SEXP _collapse_TRAlCpp(SEXP xSEXP, SEXP xAGSEXP, SEXP gSEXP, SEXP retSEXP); // fndistinctCpp RcppExport SEXP _collapse_fndistinctCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP narmSEXP); // fndistinctlCpp RcppExport SEXP _collapse_fndistinctlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP narmSEXP, SEXP dropSEXP); // fndistinctmCpp RcppExport SEXP _collapse_fndistinctmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP narmSEXP, SEXP dropSEXP); // pwnobsmCpp RcppExport SEXP _collapse_pwnobsmCpp(SEXP xSEXP); // fnobsCpp // RcppExport SEXP _collapse_fnobsCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP); // fnobsmCpp // RcppExport SEXP _collapse_fnobsmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP dropSEXP); // fnobslCpp // RcppExport SEXP _collapse_fnobslCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP dropSEXP); // varyingCpp RcppExport SEXP _collapse_varyingCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP any_groupSEXP); // varyingmCpp RcppExport SEXP _collapse_varyingmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP any_groupSEXP, SEXP dropSEXP); // varyinglCpp RcppExport SEXP _collapse_varyinglCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP any_groupSEXP, SEXP dropSEXP); // fbstatsCpp RcppExport SEXP _collapse_fbstatsCpp(SEXP xSEXP, SEXP extSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP npgSEXP, SEXP pgSEXP, SEXP wSEXP, SEXP stable_algoSEXP, SEXP arraySEXP, SEXP setnSEXP, SEXP gnSEXP); // fbstatsmCpp RcppExport SEXP _collapse_fbstatsmCpp(SEXP xSEXP, SEXP extSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP npgSEXP, SEXP pgSEXP, SEXP wSEXP, SEXP stable_algoSEXP, SEXP arraySEXP, SEXP gnSEXP); // fbstatslCpp RcppExport SEXP _collapse_fbstatslCpp(SEXP xSEXP, SEXP extSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP npgSEXP, SEXP pgSEXP, SEXP wSEXP, SEXP stable_algoSEXP, SEXP arraySEXP, SEXP gnSEXP); // ffirstCpp // RcppExport SEXP _collapse_ffirstCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP narmSEXP); // ffirstmCpp // RcppExport SEXP _collapse_ffirstmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP narmSEXP, SEXP dropSEXP); // ffirstlCpp // RcppExport SEXP _collapse_ffirstlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP narmSEXP); // fdiffgrowthCpp RcppExport SEXP _collapse_fdiffgrowthCpp(SEXP xSEXP, SEXP nSEXP, SEXP diffSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP tSEXP, SEXP retSEXP, SEXP rhoSEXP, SEXP namesSEXP, SEXP powerSEXP); // fdiffgrowthmCpp RcppExport SEXP _collapse_fdiffgrowthmCpp(SEXP xSEXP, SEXP nSEXP, SEXP diffSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP tSEXP, SEXP retSEXP, SEXP rhoSEXP, SEXP namesSEXP, SEXP powerSEXP); // fdiffgrowthlCpp RcppExport SEXP _collapse_fdiffgrowthlCpp(SEXP xSEXP, SEXP nSEXP, SEXP diffSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP tSEXP, SEXP retSEXP, SEXP rhoSEXP, SEXP namesSEXP, SEXP powerSEXP); // flagleadCpp RcppExport SEXP _collapse_flagleadCpp(SEXP xSEXP, SEXP nSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP tSEXP, SEXP namesSEXP); // flagleadmCpp RcppExport SEXP _collapse_flagleadmCpp(SEXP xSEXP, SEXP nSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP tSEXP, SEXP namesSEXP); // flagleadlCpp RcppExport SEXP _collapse_flagleadlCpp(SEXP xSEXP, SEXP nSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP tSEXP, SEXP namesSEXP); // flastCpp // RcppExport SEXP _collapse_flastCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP narmSEXP); // flastmCpp // RcppExport SEXP _collapse_flastmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP narmSEXP, SEXP dropSEXP); // flastlCpp // RcppExport SEXP _collapse_flastlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP narmSEXP); // fminmaxCpp // RcppExport SEXP _collapse_fminmaxCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP narmSEXP, SEXP retSEXP); // fminmaxmCpp // RcppExport SEXP _collapse_fminmaxmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP narmSEXP, SEXP dropSEXP, SEXP retSEXP); // fminmaxlCpp // RcppExport SEXP _collapse_fminmaxlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP narmSEXP, SEXP dropSEXP, SEXP retSEXP); // fmeanCpp RcppExport SEXP _collapse_fmeanCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP); // fmeanmCpp RcppExport SEXP _collapse_fmeanmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP); // fmeanlCpp RcppExport SEXP _collapse_fmeanlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP); // fmedianCpp // RcppExport SEXP _collapse_fmedianCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP); // fmedianmCpp // RcppExport SEXP _collapse_fmedianmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP); // fmedianlCpp // RcppExport SEXP _collapse_fmedianlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP); // fnthCpp RcppExport SEXP _collapse_fnthCpp(SEXP xSEXP, SEXP QSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP retSEXP); // fnthmCpp RcppExport SEXP _collapse_fnthmCpp(SEXP xSEXP, SEXP QSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP, SEXP retSEXP); // fnthlCpp RcppExport SEXP _collapse_fnthlCpp(SEXP xSEXP, SEXP QSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP, SEXP retSEXP); // fmodeCpp RcppExport SEXP _collapse_fmodeCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP retSEXP); // fmodelCpp RcppExport SEXP _collapse_fmodelCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP retSEXP); // fmodemCpp RcppExport SEXP _collapse_fmodemCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP, SEXP retSEXP); // fprodCpp RcppExport SEXP _collapse_fprodCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP); // fprodmCpp RcppExport SEXP _collapse_fprodmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP); // fprodlCpp RcppExport SEXP _collapse_fprodlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP); // fscaleCpp RcppExport SEXP _collapse_fscaleCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP set_meanSEXP, SEXP set_sdSEXP); // fscalemCpp RcppExport SEXP _collapse_fscalemCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP set_meanSEXP, SEXP set_sdSEXP); // fscalelCpp RcppExport SEXP _collapse_fscalelCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP set_meanSEXP, SEXP set_sdSEXP); // fsumCpp // RcppExport SEXP _collapse_fsumCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP); // fsummCpp // RcppExport SEXP _collapse_fsummCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP); // fsumlCpp // RcppExport SEXP _collapse_fsumlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP); // fvarsdCpp RcppExport SEXP _collapse_fvarsdCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP stable_algoSEXP, SEXP sdSEXP); // fvarsdmCpp RcppExport SEXP _collapse_fvarsdmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP stable_algoSEXP, SEXP sdSEXP, SEXP dropSEXP); // fvarsdlCpp RcppExport SEXP _collapse_fvarsdlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP stable_algoSEXP, SEXP sdSEXP, SEXP dropSEXP); // mrtl RcppExport SEXP _collapse_mrtl(SEXP XSEXP, SEXP namesSEXP, SEXP retSEXP); // mctl RcppExport SEXP _collapse_mctl(SEXP XSEXP, SEXP namesSEXP, SEXP retSEXP); // psmatCpp RcppExport SEXP _collapse_psmatCpp(SEXP xSEXP, SEXP gSEXP, SEXP tSEXP, SEXP transposeSEXP); // qFCpp RcppExport SEXP _collapse_qFCpp(SEXP xSEXP, SEXP orderedSEXP, SEXP na_excludeSEXP, SEXP keep_attrSEXP, SEXP retSEXP); // qGCpp // RcppExport SEXP _collapse_qGCpp(SEXP xSEXP, SEXP sortSEXP, SEXP orderedSEXP, SEXP na_excludeSEXP, SEXP retgrpSEXP); // funiqueCpp RcppExport SEXP _collapse_funiqueCpp(SEXP xSEXP, SEXP sortSEXP); // fdroplevelsCpp RcppExport SEXP _collapse_fdroplevelsCpp(SEXP xSEXP, SEXP check_NASEXP); // setAttributes // SEXP _collapse_setAttributes(SEXP xSEXP, SEXP aSEXP); // setattributes // SEXP _collapse_setattributes(SEXP xSEXP, SEXP aSEXP); // setAttr // RcppExport SEXP _collapse_setAttr(SEXP xSEXP, SEXP aSEXP, SEXP vSEXP); // setattr // SEXP _collapse_setattr(SEXP xSEXP, SEXP aSEXP, SEXP vSEXP); // duplAttributes // SEXP _collapse_duplAttributes(SEXP xSEXP, SEXP ySEXP); // duplattributes // SEXP _collapse_duplattributes(SEXP xSEXP, SEXP ySEXP); // cond_duplAttributes // SEXP _collapse_cond_duplAttributes(SEXP xSEXP, SEXP ySEXP); // cond_duplattributes // RcppExport SEXP _collapse_cond_duplattributes(SEXP xSEXP, SEXP ySEXP); // seqid RcppExport SEXP _collapse_seqid(SEXP xSEXP, SEXP oSEXP, SEXP delSEXP, SEXP startSEXP, SEXP na_skipSEXP, SEXP skip_seqSEXP, SEXP check_oSEXP); // groupid RcppExport SEXP _collapse_groupid(SEXP xSEXP, SEXP oSEXP, SEXP startSEXP, SEXP na_skipSEXP, SEXP check_oSEXP); collapse/src/stats_pacf.c0000644000176200001440000000365213672002162015143 0ustar liggesusers/* R : A Computer Language for Statistical Data Analysis * * Copyright (C) 1999-2016 The R Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * https://www.R-project.org/Licenses/. */ // #ifdef HAVE_CONFIG_H // # include // #endif // #include "data.table.h" #include #include #include // #include // #include "ts.h" /* cor is the autocorrelations starting from 0 lag*/ static void uni_pacf(double *cor, double *p, int nlag) { double a, b, c, *v, *w; v = (double*) R_alloc(nlag, sizeof(double)); w = (double*) R_alloc(nlag, sizeof(double)); w[0] = p[0] = cor[1]; for(int ll = 1; ll < nlag; ll++) { a = cor[ll+1]; b = 1.0; for(int i = 0; i < ll; i++) { a -= w[i] * cor[ll - i]; b -= w[i] * cor[i + 1]; } p[ll] = c = a/b; if(ll+1 == nlag) break; w[ll] = c; for(int i = 0; i < ll; i++) v[ll-i-1] = w[i]; for(int i = 0; i < ll; i++) w[i] -= c*v[i]; } } SEXP pacf1(SEXP acf, SEXP lmax) { int lagmax = asInteger(lmax); acf = PROTECT(coerceVector(acf, REALSXP)); SEXP ans = PROTECT(allocVector(REALSXP, lagmax)); uni_pacf(REAL(acf), REAL(ans), lagmax); SEXP d = PROTECT(allocVector(INTSXP, 3)); INTEGER(d)[0] = lagmax; INTEGER(d)[1] = INTEGER(d)[2] = 1; setAttrib(ans, R_DimSymbol, d); UNPROTECT(3); return ans; } collapse/src/fmin_fmax.c0000644000176200001440000002232514066670142014765 0ustar liggesusers#include "collapse_c.h" // #include void fmin_double_impl(double *pout, double *px, int ng, int *pg, int narm, int l) { if(ng == 0) { double min; if(narm) { int j = l-1; min = px[j]; while(ISNAN(min) && j!=0) min = px[--j]; if(j != 0) for(int i = j; i--; ) { if(min > px[i]) min = px[i]; } } else { min = px[0]; for(int i = 0; i != l; ++i) { if(ISNAN(px[i])) { min = px[i]; break; } else { if(min > px[i]) min = px[i]; } } } pout[0] = min; } else { if(narm) { for(int i = ng; i--; ) pout[i] = NA_REAL; // Other way ? --pout; for(int i = l; i--; ) if(pout[pg[i]] > px[i] || ISNAN(pout[pg[i]])) pout[pg[i]] = px[i]; // fastest } else { for(int i = ng; i--; ) pout[i] = DBL_MAX; --pout; for(int i = l; i--; ) if(pout[pg[i]] > px[i] || ISNAN(px[i])) pout[pg[i]] = px[i]; // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. } } } void fmin_int_impl(int *pout, int *px, int ng, int *pg, int narm, int l) { if(ng == 0) { int min; if(narm) { int j = l-1; min = px[j]; while(min == NA_INTEGER && j!=0) min = px[--j]; if(j != 0) for(int i = j; i--; ) { if(min > px[i] && px[i] != NA_INTEGER) min = px[i]; } } else { min = px[0]; for(int i = 0; i != l; ++i) { if(px[i] == NA_INTEGER) { min = NA_INTEGER; break; } else { if(min > px[i]) min = px[i]; } } } pout[0] = min; } else { if(narm) { for(int i = ng; i--; ) pout[i] = NA_INTEGER; --pout; for(int i = l; i--; ) if(px[i] != NA_INTEGER && (pout[pg[i]] > px[i] || pout[pg[i]] == NA_INTEGER)) pout[pg[i]] = px[i]; // fastest?? } else { for(int i = ng; i--; ) pout[i] = INT_MAX; --pout; for(int i = l; i--; ) if(pout[pg[i]] > px[i]) pout[pg[i]] = px[i]; } } } void fmax_double_impl(double *pout, double *px, int ng, int *pg, int narm, int l) { if(ng == 0) { double max; if(narm) { int j = l-1; max = px[j]; while(ISNAN(max) && j!=0) max = px[--j]; if(j != 0) for(int i = j; i--; ) { if(max < px[i]) max = px[i]; } } else { max = px[0]; for(int i = 0; i != l; ++i) { if(ISNAN(px[i])) { max = px[i]; break; } else { if(max < px[i]) max = px[i]; } } } pout[0] = max; } else { if(narm) { for(int i = ng; i--; ) pout[i] = NA_REAL; // Other way ? --pout; for(int i = l; i--; ) if(pout[pg[i]] < px[i] || ISNAN(pout[pg[i]])) pout[pg[i]] = px[i]; // fastest } else { for(int i = ng; i--; ) pout[i] = DBL_MIN; --pout; for(int i = l; i--; ) if(pout[pg[i]] < px[i] || ISNAN(px[i])) pout[pg[i]] = px[i]; // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. } } } void fmax_int_impl(int *pout, int *px, int ng, int *pg, int narm, int l) { if(ng == 0) { int max; if(narm) { max = NA_INTEGER; // same as INT_MIN for(int i = l; i--; ) if(max < px[i]) max = px[i]; } else { max = px[0]; for(int i = 0; i != l; ++i) { if(px[i] == NA_INTEGER) { max = NA_INTEGER; break; } else { if(max < px[i]) max = px[i]; } } } pout[0] = max; } else { if(narm) { for(int i = ng; i--; ) pout[i] = NA_INTEGER; --pout; for(int i = l; i--; ) if(pout[pg[i]] < px[i]) pout[pg[i]] = px[i]; // fastest?? } else { for(int i = ng; i--; ) pout[i] = INT_MIN + 1; // best ?? --pout; for(int i = l; i--; ) if(px[i] == NA_INTEGER || (pout[pg[i]] != NA_INTEGER && pout[pg[i]] < px[i])) pout[pg[i]] = px[i]; } } } SEXP fminC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm) { int l = length(x), tx = TYPEOF(x), ng = asInteger(Rng), narm = asLogical(Rnarm); if (l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng && l != length(g)) error("length(g) must match length(x)"); if(tx == LGLSXP) tx = INTSXP; // ALTREP methods for compact sequences: not safe yet and not part of the API. // if(ALTREP(x) && ng == 0) { // if(tx == INTSXP) return ALTINTEGER_MIN(x, (Rboolean)narm); // if(tx == REALSXP) return ALTREAL_MIN(x, (Rboolean)narm); // error("ALTREP object must be integer or real typed"); // } SEXP out = PROTECT(allocVector(tx, ng == 0 ? 1 : ng)); switch(tx) { case REALSXP: fmin_double_impl(REAL(out), REAL(x), ng, INTEGER(g), narm, l); break; case INTSXP: fmin_int_impl(INTEGER(out), INTEGER(x), ng, INTEGER(g), narm, l); break; default: error("Unsupported SEXP type"); } if(ng && !isObject(x)) copyMostAttrib(x, out); UNPROTECT(1); return out; } SEXP fminmC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); int tx = TYPEOF(x), l = INTEGER(dim)[0], col = INTEGER(dim)[1], *pg = INTEGER(g), ng = asInteger(Rng), ng1 = ng == 0 ? 1 : ng, narm = asLogical(Rnarm); if (l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng && l != length(g)) error("length(g) must match nrow(x)"); if(tx == LGLSXP) tx = INTSXP; SEXP out = PROTECT(allocVector(tx, ng == 0 ? col : col * ng)); switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int j = 0; j != col; ++j) fmin_double_impl(pout + j*ng1, px + j*l, ng, pg, narm, l); break; } case INTSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int j = 0; j != col; ++j) fmin_int_impl(pout + j*ng1, px + j*l, ng, pg, narm, l); break; } default: error("Unsupported SEXP type"); } matCopyAttr(out, x, Rdrop, ng); UNPROTECT(1); return out; } SEXP fminlC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop) { int l = length(x), ng = asInteger(Rng); if(l < 1) return x; // needed ?? if(ng == 0 && asLogical(Rdrop)) { SEXP out = PROTECT(allocVector(REALSXP, l)), *px = SEXPPTR(x); double *pout = REAL(out); for(int j = 0; j != l; ++j) pout[j] = asReal(fminC(px[j], Rng, g, Rnarm)); setAttrib(out, R_NamesSymbol, getAttrib(x, R_NamesSymbol)); UNPROTECT(1); return out; } SEXP out = PROTECT(allocVector(VECSXP, l)), *pout = SEXPPTR(out), *px = SEXPPTR(x); for(int j = 0; j != l; ++j) pout[j] = fminC(px[j], Rng, g, Rnarm); if(ng == 0) for(int j = 0; j != l; ++j) copyMostAttrib(px[j], pout[j]); DFcopyAttr(out, x, ng); UNPROTECT(1); return out; } SEXP fmaxC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm) { int l = length(x), tx = TYPEOF(x), ng = asInteger(Rng), narm = asLogical(Rnarm); if (l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng && l != length(g)) error("length(g) must match length(x)"); if(tx == LGLSXP) tx = INTSXP; // ALTREP methods for compact sequences: not safe yet and not part of the API. // if(ALTREP(x) && ng == 0) { // if(tx == INTSXP) return ALTINTEGER_MAX(x, (Rboolean)narm); // if(tx == REALSXP) return ALTREAL_MAX(x, (Rboolean)narm); // error("ALTREP object must be integer or real typed"); // } SEXP out = PROTECT(allocVector(tx, ng == 0 ? 1 : ng)); switch(tx) { case REALSXP: fmax_double_impl(REAL(out), REAL(x), ng, INTEGER(g), narm, l); break; case INTSXP: fmax_int_impl(INTEGER(out), INTEGER(x), ng, INTEGER(g), narm, l); break; default: error("Unsupported SEXP type"); } if(ng && !isObject(x)) copyMostAttrib(x, out); UNPROTECT(1); return out; } SEXP fmaxmC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); int tx = TYPEOF(x), l = INTEGER(dim)[0], col = INTEGER(dim)[1], *pg = INTEGER(g), ng = asInteger(Rng), ng1 = ng == 0 ? 1 : ng, narm = asLogical(Rnarm); if (l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng && l != length(g)) error("length(g) must match nrow(x)"); if(tx == LGLSXP) tx = INTSXP; SEXP out = PROTECT(allocVector(tx, ng == 0 ? col : col * ng)); switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int j = 0; j != col; ++j) fmax_double_impl(pout + j*ng1, px + j*l, ng, pg, narm, l); break; } case INTSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int j = 0; j != col; ++j) fmax_int_impl(pout + j*ng1, px + j*l, ng, pg, narm, l); break; } default: error("Unsupported SEXP type"); } matCopyAttr(out, x, Rdrop, ng); UNPROTECT(1); return out; } SEXP fmaxlC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop) { int l = length(x), ng = asInteger(Rng); if(l < 1) return x; // needed ?? if(ng == 0 && asLogical(Rdrop)) { SEXP out = PROTECT(allocVector(REALSXP, l)), *px = SEXPPTR(x); double *pout = REAL(out); for(int j = 0; j != l; ++j) pout[j] = asReal(fmaxC(px[j], Rng, g, Rnarm)); setAttrib(out, R_NamesSymbol, getAttrib(x, R_NamesSymbol)); UNPROTECT(1); return out; } SEXP out = PROTECT(allocVector(VECSXP, l)), *pout = SEXPPTR(out), *px = SEXPPTR(x); for(int j = 0; j != l; ++j) pout[j] = fmaxC(px[j], Rng, g, Rnarm); if(ng == 0) for(int j = 0; j != l; ++j) copyMostAttrib(px[j], pout[j]); DFcopyAttr(out, x, ng); UNPROTECT(1); return out; } collapse/src/stats_mAR.c0000644000176200001440000004016413672002744014716 0ustar liggesusers/* * Copyright (C) 1999 Martyn Plummer * Copyright (C) 1999-2016 The R Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * https://www.R-project.org/Licenses/. */ #include #include // #include "data.table.h" #include #include #include #include // #include #include /* Fortran routines */ // #include "ts.h" // #include "stats.h" #define MAX_DIM_LENGTH 4 #define VECTOR(x) (x.vec) #define MATRIX(x) (x.mat) #define ARRAY1(x) (x.vec) #define ARRAY2(x) (x.mat) #define ARRAY3(x) (x.arr3) #define ARRAY4(x) (x.arr4) #define DIM(x) (x.dim) #define NROW(x) (x.dim[0]) #define NCOL(x) (x.dim[1]) #define DIM_LENGTH(x) (x.ndim) typedef struct array { double *vec; double **mat; double ***arr3; double ****arr4; int dim[MAX_DIM_LENGTH]; int ndim; } Array; static Array make_array(double vec[], int dim[], int ndim); static Array make_zero_array(int dim[], int ndim); static Array make_zero_matrix(int nrow, int ncol); static Array make_identity_matrix(int n); static Array subarray(Array a, int index); static int vector_length(Array a); static void set_array_to_zero(Array arr); static void copy_array (Array orig, Array ans); static void array_op(Array arr1, Array arr2, char op, Array ans); static void transpose_matrix(Array mat, Array ans); static void matrix_prod(Array mat1, Array mat2, int trans1, int trans2, Array ans); /* Functions for dynamically allocating arrays The Array structure contains pointers to arrays which are allocated using the R_alloc function. Although the .C() interface cleans up all memory assigned with R_alloc, judicious use of vmaxget() vmaxset() to free this memory is probably wise. See memory.c in R core. */ static void assert(bool bla) { if(!bla) error("assert failed in src/library/ts/src/carray.c"); } static Array init_array(void) { int i; Array a; /* Initialize everything to zero. Useful for debugging */ ARRAY1(a) = (double *) '\0'; ARRAY2(a) = (double **) '\0'; ARRAY3(a) = (double ***) '\0'; ARRAY4(a) = (double ****) '\0'; for (i = 0; i < MAX_DIM_LENGTH; i++) DIM(a)[i] = 0; DIM_LENGTH(a) = 0; return a; } static int vector_length(Array a) { int i, len; for (i = 0, len = 1; i < DIM_LENGTH(a); i++) { len *= DIM(a)[i]; } return len; } static Array make_array(double vec[], int dim[], int ndim) { int d, i, j; int len[MAX_DIM_LENGTH + 1]; Array a; assert(ndim <= MAX_DIM_LENGTH); a = init_array(); len[ndim] = 1; for (d = ndim; d >= 1; d--) { len[d-1] = len[d] * dim[ndim - d]; } for (d = 1; d <= ndim; d++) { switch(d) { case 1: VECTOR(a) = vec; break; case 2: ARRAY2(a) = (double**) R_alloc(len[2 - 1],sizeof(double*)); for(i = 0, j = 0; i < len[2 - 1]; i++, j+=dim[ndim - 2 + 1]) { ARRAY2(a)[i] = ARRAY1(a) + j; } break; case 3: ARRAY3(a) = (double***) R_alloc(len[3 - 1],sizeof(double**)); for(i = 0, j = 0; i < len[3 - 1]; i++, j+=dim[ndim - 3 + 1]) { ARRAY3(a)[i] = ARRAY2(a) + j; } break; case 4: ARRAY4(a) = (double****) R_alloc(len[4 - 1],sizeof(double***)); for(i = 0, j = 0; i < len[4 - 1]; i++, j+=dim[ndim - 4 + 1]) { ARRAY4(a)[i] = ARRAY3(a) + j; } break; default: break; } } for (i = 0; i < ndim; i++) { DIM(a)[i] = dim[i]; } DIM_LENGTH(a) = ndim; return a; } static Array make_zero_array(int dim[], int ndim) { int i; int len; double *vec; for (i = 0, len = 1; i < ndim; i++) { len *= dim[i]; } vec = (double *) R_alloc(len, sizeof(double)); for (i = 0; i < len; i++) { vec[i] = 0.0; } return make_array(vec, dim, ndim); } static Array make_zero_matrix(int nrow, int ncol) { int dim[2]; Array a; dim[0] = nrow; dim[1] = ncol; a = make_zero_array(dim, 2); return a; } static Array subarray(Array a, int index) /* Return subarray of array a in the form of an Array structure so it can be manipulated by other functions NB The data are not copied, so any changes made to the subarray will affect the original array. */ { int i, offset; Array b; b = init_array(); /* is index in range? */ assert( index >= 0 && index < DIM(a)[0] ); offset = index; switch(DIM_LENGTH(a)) { /* NB Falling through here */ case 4: offset *= DIM(a)[DIM_LENGTH(a) - 4 + 1]; ARRAY3(b) = ARRAY3(a) + offset; case 3: offset *= DIM(a)[DIM_LENGTH(a) - 3 + 1]; ARRAY2(b) = ARRAY2(a) + offset; case 2: offset *= DIM(a)[DIM_LENGTH(a) - 2 + 1]; ARRAY1(b) = ARRAY1(a) + offset; break; default: break; } DIM_LENGTH(b) = DIM_LENGTH(a) - 1; for (i = 0; i < DIM_LENGTH(b); i++) DIM(b)[i] = DIM(a)[i+1]; return b; } static int test_array_conform(Array a1, Array a2) { int i, ans = FALSE; if (DIM_LENGTH(a1) != DIM_LENGTH(a2)) return FALSE; else for (i = 0; i < DIM_LENGTH(a1); i++) { if (DIM(a1)[i] == DIM(a2)[i]) ans = TRUE; else return FALSE; } return ans; } static void copy_array (Array orig, Array ans) /* copy matrix orig to ans */ { int i; assert (test_array_conform(orig, ans)); for(i = 0; i < vector_length(orig); i++) VECTOR(ans)[i] = VECTOR(orig)[i]; } static void transpose_matrix(Array mat, Array ans) { int i,j; const void *vmax; Array tmp; tmp = init_array(); assert(DIM_LENGTH(mat) == 2 && DIM_LENGTH(ans) == 2); assert(NCOL(mat) == NROW(ans)); assert(NROW(mat) == NCOL(ans)); vmax = vmaxget(); tmp = make_zero_matrix(NROW(ans), NCOL(ans)); for(i = 0; i < NROW(mat); i++) for(j = 0; j < NCOL(mat); j++) MATRIX(tmp)[j][i] = MATRIX(mat)[i][j]; copy_array(tmp, ans); vmaxset(vmax); } static void array_op(Array arr1, Array arr2, char op, Array ans) /* Element-wise array operations */ { int i; assert (test_array_conform(arr1, arr2)); assert (test_array_conform(arr2, ans)); switch (op) { case '*': for (i = 0; i < vector_length(ans); i++) VECTOR(ans)[i] = VECTOR(arr1)[i] * VECTOR(arr2)[i]; break; case '+': for (i = 0; i < vector_length(ans); i++) VECTOR(ans)[i] = VECTOR(arr1)[i] + VECTOR(arr2)[i]; break; case '/': for (i = 0; i < vector_length(ans); i++) VECTOR(ans)[i] = VECTOR(arr1)[i] / VECTOR(arr2)[i]; break; case '-': for (i = 0; i < vector_length(ans); i++) VECTOR(ans)[i] = VECTOR(arr1)[i] - VECTOR(arr2)[i]; break; default: error("Unknown op in array_op"); // printf } } static void matrix_prod(Array mat1, Array mat2, int trans1, int trans2, Array ans) /* General matrix product between mat1 and mat2. Put answer in ans. trans1 and trans2 are logical flags which indicate if the matrix is to be transposed. Normal matrix multiplication has trans1 = trans2 = 0. */ { int i,j,k,K1,K2; const void *vmax; double m1, m2; Array tmp; /* Test whether everything is a matrix */ assert(DIM_LENGTH(mat1) == 2 && DIM_LENGTH(mat2) == 2 && DIM_LENGTH(ans) == 2); /* Test whether matrices conform. K is the dimension that is lost by multiplication */ if (trans1) { assert ( NCOL(mat1) == NROW(ans) ); K1 = NROW(mat1); } else { assert ( NROW(mat1) == NROW(ans) ); K1 = NCOL(mat1); } if (trans2) { assert ( NROW(mat2) == NCOL(ans) ); K2 = NCOL(mat2); } else { assert ( NCOL(mat2) == NCOL(ans) ); K2 = NROW(mat2); } assert (K1 == K2); tmp = init_array(); /* In case ans is the same as mat1 or mat2, we create a temporary matrix to hold the answer, then copy it to ans */ vmax = vmaxget(); tmp = make_zero_matrix(NROW(ans), NCOL(ans)); for (i = 0; i < NROW(tmp); i++) { for (j = 0; j < NCOL(tmp); j++) { for(k = 0; k < K1; k++) { m1 = (trans1) ? MATRIX(mat1)[k][i] : MATRIX(mat1)[i][k]; m2 = (trans2) ? MATRIX(mat2)[j][k] : MATRIX(mat2)[k][j]; MATRIX(tmp)[i][j] += m1 * m2; } } } copy_array(tmp, ans); vmaxset(vmax); } static void set_array_to_zero(Array arr) { int i; for (i = 0; i < vector_length(arr); i++) VECTOR(arr)[i] = 0.0; } static Array make_identity_matrix(int n) { int i; Array a; a = make_zero_matrix(n,n); for(i = 0; i < n; i++) MATRIX(a)[i][i] = 1.0; return a; } static void qr_solve(Array x, Array y, Array coef) /* Translation of the R function qr.solve into pure C NB We have to transpose the matrices since the ordering of an array is different in Fortran NB2 We have to copy x to avoid it being overwritten. */ { int i, info = 0, rank, *pivot, n, p; const void *vmax; double tol = 1.0E-7, *qraux, *work; Array xt, yt, coeft; assert(NROW(x) == NROW(y)); assert(NCOL(coef) == NCOL(y)); assert(NCOL(x) == NROW(coef)); vmax = vmaxget(); qraux = (double *) R_alloc(NCOL(x), sizeof(double)); pivot = (int *) R_alloc(NCOL(x), sizeof(int)); work = (double *) R_alloc(2*NCOL(x), sizeof(double)); for(i = 0; i < NCOL(x); i++) pivot[i] = i+1; xt = make_zero_matrix(NCOL(x), NROW(x)); transpose_matrix(x,xt); n = NROW(x); p = NCOL(x); F77_CALL(dqrdc2)(VECTOR(xt), &n, &n, &p, &tol, &rank, qraux, pivot, work); if (rank != p) error("Singular matrix in qr_solve"); yt = make_zero_matrix(NCOL(y), NROW(y)); coeft = make_zero_matrix(NCOL(coef), NROW(coef)); transpose_matrix(y, yt); F77_CALL(dqrcf)(VECTOR(xt), &NROW(x), &rank, qraux, yt.vec, &NCOL(y), coeft.vec, &info); transpose_matrix(coeft,coef); vmaxset(vmax); } static double ldet(Array x) /* Log determinant of square matrix */ { int i, rank, *pivot, n, p; const void *vmax; double ll, tol = 1.0E-7, *qraux, *work; Array xtmp; assert(DIM_LENGTH(x) == 2); /* is x a matrix? */ assert(NROW(x) == NCOL(x)); /* is x square? */ vmax = vmaxget(); qraux = (double *) R_alloc(NCOL(x), sizeof(double)); pivot = (int *) R_alloc(NCOL(x), sizeof(int)); work = (double *) R_alloc(2*NCOL(x), sizeof(double)); xtmp = make_zero_matrix(NROW(x), NCOL(x)); copy_array(x, xtmp); for(i = 0; i < NCOL(x); i++) pivot[i] = i+1; p = n = NROW(x); F77_CALL(dqrdc2)(VECTOR(xtmp), &n, &n, &p, &tol, &rank, qraux, pivot, work); if (rank != p) error("Singular matrix in ldet"); for (i = 0, ll=0.0; i < rank; i++) { ll += log(fabs(MATRIX(xtmp)[i][i])); } vmaxset(vmax); return ll; } /* Whittle's algorithm for autoregression estimation multi_yw is the interface to R. It also handles model selection using AIC whittle,whittle2 implement Whittle's recursion for solving the multivariate Yule-Walker equations. Notation resid residuals (forward and backward) A Estimates of forward autocorrelation coefficients B Estimates of backward autocorrelation coefficients EA,EB Prediction Variance KA,KB Partial correlation coefficient */ void multi_yw(double *acf, int *pn, int *pomax, int *pnser, double *coef, double *pacf, double *var, double *aic, int *porder, int *puseaic); static void whittle(Array acf, int nlag, Array *A, Array *B, Array p_forward, Array v_forward, Array p_back, Array v_back); static void whittle2 (Array acf, Array Aold, Array Bold, int lag, char *direction, Array A, Array K, Array E); void multi_yw(double *acf, int *pn, int *pomax, int *pnser, double *coef, double *pacf, double *var, double *aic, int *porder, int *useaic) { int i, m; int omax = *pomax, n = *pn, nser=*pnser, order=*porder; double aicmin; Array acf_array, p_forward, p_back, v_forward, v_back; Array *A, *B; int dim[3]; dim[0] = omax+1; dim[1] = dim[2] = nser; acf_array = make_array(acf, dim, 3); p_forward = make_array(pacf, dim, 3); v_forward = make_array(var, dim, 3); /* Backward equations (discarded) */ p_back= make_zero_array(dim, 3); v_back= make_zero_array(dim, 3); A = (Array *) R_alloc(omax+2, sizeof(Array)); B = (Array *) R_alloc(omax+2, sizeof(Array)); for (i = 0; i <= omax; i++) { A[i] = make_zero_array(dim, 3); B[i] = make_zero_array(dim, 3); } whittle(acf_array, omax, A, B, p_forward, v_forward, p_back, v_back); /* Model order selection */ for (m = 0; m <= omax; m++) { aic[m] = n * ldet(subarray(v_forward,m)) + 2 * m * nser * nser; } if (*useaic) { order = 0; aicmin = aic[0]; for (m = 0; m <= omax; m++) { if (aic[m] < aicmin) { aicmin = aic[m]; order = m; } } } else order = omax; *porder = order; for(i = 0; i < vector_length(A[order]); i++) coef[i] = VECTOR(A[order])[i]; } static void whittle(Array acf, int nlag, Array *A, Array *B, Array p_forward, Array v_forward, Array p_back, Array v_back) { int lag, nser = DIM(acf)[1]; const void *vmax; Array EA, EB; /* prediction variance */ Array KA, KB; /* partial correlation coefficient */ Array id, tmp; vmax = vmaxget(); KA = make_zero_matrix(nser, nser); EA = make_zero_matrix(nser, nser); KB = make_zero_matrix(nser, nser); EB = make_zero_matrix(nser, nser); id = make_identity_matrix(nser); copy_array(id, subarray(A[0],0)); copy_array(id, subarray(B[0],0)); copy_array(id, subarray(p_forward,0)); copy_array(id, subarray(p_back,0)); for (lag = 1; lag <= nlag; lag++) { whittle2(acf, A[lag-1], B[lag-1], lag, "forward", A[lag], KA, EB); whittle2(acf, B[lag-1], A[lag-1], lag, "back", B[lag], KB, EA); copy_array(EA, subarray(v_forward,lag-1)); copy_array(EB, subarray(v_back,lag-1)); copy_array(KA, subarray(p_forward,lag)); copy_array(KB, subarray(p_back,lag)); } tmp = make_zero_matrix(nser,nser); matrix_prod(KB,KA, 1, 1, tmp); array_op(id, tmp, '-', tmp); matrix_prod(EA, tmp, 0, 0, subarray(v_forward, nlag)); vmaxset(vmax); } static void whittle2 (Array acf, Array Aold, Array Bold, int lag, char *direction, Array A, Array K, Array E) { int d, i, nser=DIM(acf)[1]; const void *vmax; Array beta, tmp, id; d = strcmp(direction, "forward") == 0; vmax = vmaxget(); beta = make_zero_matrix(nser,nser); tmp = make_zero_matrix(nser, nser); id = make_identity_matrix(nser); set_array_to_zero(E); copy_array(id, subarray(A,0)); for(i = 0; i < lag; i++) { matrix_prod(subarray(acf,lag - i), subarray(Aold,i), d, 1, tmp); array_op(beta, tmp, '+', beta); matrix_prod(subarray(acf,i), subarray(Bold,i), d, 1, tmp); array_op(E, tmp, '+', E); } qr_solve(E, beta, K); transpose_matrix(K,K); for (i = 1; i <= lag; i++) { matrix_prod(K, subarray(Bold,lag - i), 0, 0, tmp); array_op(subarray(Aold,i), tmp, '-', subarray(A,i)); } vmaxset(vmax); } // static const R_CMethodDef CEntries[] = { // {"multi_yw", (DL_FUNC) &multi_yw, 10}, // {NULL, NULL, 0} //}; // void R_init_stat(DllInfo *dll) // { // R_registerRoutines(dll, CEntries, NULL, NULL, NULL); // R_useDynamicSymbols(dll, FALSE); //} collapse/src/TRA.cpp0000644000176200001440000010710614174223734014011 0ustar liggesusers// // [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; // Cases: // 1- replace // 2- replace with NA rm // 3- demean // 4- demean with global mean added // 5- Proportion // 6- Percentages // 7- Add // 8- Multiply // 9- Modulus // 10- Subtract Modulus // int(x * (1/y)) -> This gave the UBSAN error if NaN !!! inline double mymod(double x, double y) { double z(x * (1/y)); return (z == z) ? x - (int)(z) * y : z; // faster than x - (int)(x/y) * y; // also C-style conversions seem to be faster ? } // #define mymod(x, y) (x - ((int)(x/y) * y)) // Macro: not faster ! // template // constexpr double mymod (T x, U mod) // { // return !mod ? x : x - mod * static_cast(x / mod); // } // int(x * (1/y)) -> This gave the UBSAN error if NaN !!! inline double myremain(double x, double y) { double z(x * (1/y)); return (z == z) ? (int)(z) * y : z; // (int)(x * (1/y)) * y; <- This would be enough, but doesn't keep missing values in x! } SEXP ret1(const SEXP& x, const SEXP& xAG, const SEXP& g) { int tx = TYPEOF(x), txAG = TYPEOF(xAG), l = Rf_length(x), gs = Rf_length(g); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 int *pg; bool nog = gs == 1; if(nog) { if(Rf_length(xAG) != 1) stop("If g = NULL, NROW(STATS) needs to be 1"); } else { if(gs != l) stop("length(g) must match NROW(x)"); pg = INTEGER(g); } SEXP out = PROTECT(Rf_allocVector(txAG, l)); switch(txAG) { case REALSXP: { double *pout = REAL(out); if(nog) { // memset(pout, Rf_asReal(xAG), l * sizeof(double)); memset only works with 0 !! double AG = Rf_asReal(xAG); for(int i = l; i--; ) pout[i] = AG; } else { double *AG = REAL(xAG)-1; for(int i = l; i--; ) pout[i] = AG[pg[i]]; } break; } case INTSXP: { int *pout = INTEGER(out); if(nog) { int AG = Rf_asInteger(xAG); for(int i = l; i--; ) pout[i] = AG; } else { int *AG = INTEGER(xAG)-1; for(int i = l; i--; ) pout[i] = AG[pg[i]]; } break; } case STRSXP: { // CharacterVector AG = xAG; // if(nog) out = CharacterVector(l, String(AG[0])); // else { // CharacterVector pout = out; // for(int i = l; i--; ) pout[i] = AG[pg[i]-1]; // } // break; SEXP *pout = STRING_PTR(out); if(nog) { SEXP AG = Rf_asChar(xAG); for(int i = l; i--; ) pout[i] = AG; // SET_STRING_ELT(out, i, AG); // Without pointer -> much slower! } else { SEXP *AG = STRING_PTR(xAG)-1; for(int i = l; i--; ) pout[i] = AG[pg[i]]; // SET_STRING_ELT(out, i, AG[pg[i]]); // Without pointer -> much slower! } break; } case LGLSXP: { int *pout = LOGICAL(out); if(nog) { int AG = Rf_asLogical(xAG); for(int i = l; i--; ) pout[i] = AG; } else { int *AG = LOGICAL(xAG)-1; for(int i = l; i--; ) pout[i] = AG[pg[i]]; } break; } default: stop("Not supported SEXP type!"); } // Attribute Handling - 4 Situations: // 1 - x is classed (factor, date, time series), xAG is not classed. i.e. vector of fnobs, fmean etc. // -> Sallow replacing, removing class and levels attributes from x, discard attributes of xAG (if any) // -> or (if type matches i.e. double for date or time series), copy attributes of x unless x is a factor // 2 - x is not classed, xAG is classed (factor, date, time series). - an unusual situation should not occurr - copy attributes of xAG, discard attributes of x // 3 - xAG and x are classed - same as above, keep attributes of xAG, discard attributes of x // 4 - neither x nor xAG are classed - preserve attributes of x, discard attributes of xAG (if any) // if(Rf_isObject(xAG)) SHALLOW_DUPLICATE_ATTRIB(out, xAG); else if(!Rf_isObject(x) || (tx == txAG && !Rf_isFactor(x))) SHALLOW_DUPLICATE_ATTRIB(out, x); else { SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_classgets(out, R_NilValue); // OK ! Rf_setAttrib(out, R_LevelsSymbol, R_NilValue); // if(Rf_isFactor(x)) ? faster ? } UNPROTECT(1); return out; } SEXP ret2(const SEXP& x, const SEXP& xAG, const SEXP& g) { int l = Rf_length(x), gs = Rf_length(g), tx = TYPEOF(x), txAG = TYPEOF(xAG); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 int *pg; bool nog = gs == 1; if(nog) { if(Rf_length(xAG) != 1) stop("If g = NULL, NROW(STATS) needs to be 1"); } else { if(gs != l) stop("length(g) must match NROW(x)"); pg = INTEGER(g); // Wmaybe uninitialized } SEXP out = PROTECT(Rf_allocVector(txAG, l)); switch(tx) { case REALSXP: { double *px = REAL(x); switch(txAG) { case REALSXP: { double *pout = REAL(out); if(nog) { double AG = Rf_asReal(xAG); for(int i = l; i--; ) pout[i] = (std::isnan(px[i])) ? NA_REAL : AG; } else { double *AG = REAL(xAG)-1; for(int i = l; i--; ) pout[i] = (std::isnan(px[i])) ? NA_REAL : AG[pg[i]]; } break; } case INTSXP: { int *pout = INTEGER(out); if(nog) { int AG = Rf_asInteger(xAG); for(int i = l; i--; ) pout[i] = (std::isnan(px[i])) ? NA_INTEGER : AG; } else { int *AG = INTEGER(xAG)-1; for(int i = l; i--; ) pout[i] = (std::isnan(px[i])) ? NA_INTEGER : AG[pg[i]]; } break; } case STRSXP: { SEXP *pout = STRING_PTR(out); if(nog) { SEXP AG = Rf_asChar(xAG); for(int i = l; i--; ) pout[i] = (std::isnan(px[i])) ? NA_STRING : AG; } else { SEXP *AG = STRING_PTR(xAG)-1; for(int i = l; i--; ) pout[i] = (std::isnan(px[i])) ? NA_STRING : AG[pg[i]]; } break; } case LGLSXP: { int *pout = LOGICAL(out); if(nog) { int AG = Rf_asLogical(xAG); for(int i = l; i--; ) pout[i] = (std::isnan(px[i])) ? NA_LOGICAL : AG; } else { int *AG = LOGICAL(xAG)-1; for(int i = l; i--; ) pout[i] = (std::isnan(px[i])) ? NA_LOGICAL : AG[pg[i]]; } break; } default: stop("Not supported SEXP type!"); } break; } case INTSXP: { int *px = INTEGER(x); switch(txAG) { case REALSXP: { double *pout = REAL(out); if(nog) { double AG = Rf_asReal(xAG); for(int i = l; i--; ) pout[i] = (px[i] == NA_INTEGER) ? NA_REAL : AG; } else { double *AG = REAL(xAG)-1; for(int i = l; i--; ) pout[i] = (px[i] == NA_INTEGER) ? NA_REAL : AG[pg[i]]; } break; } case INTSXP: { int *pout = INTEGER(out); if(nog) { int AG = Rf_asInteger(xAG); for(int i = l; i--; ) pout[i] = (px[i] == NA_INTEGER) ? NA_INTEGER : AG; } else { int *AG = INTEGER(xAG)-1; for(int i = l; i--; ) pout[i] = (px[i] == NA_INTEGER) ? NA_INTEGER : AG[pg[i]]; } break; } case STRSXP: { SEXP *pout = STRING_PTR(out); if(nog) { SEXP AG = Rf_asChar(xAG); for(int i = l; i--; ) pout[i] = (px[i] == NA_INTEGER) ? NA_STRING : AG; } else { SEXP *AG = STRING_PTR(xAG)-1; for(int i = l; i--; ) pout[i] = (px[i] == NA_INTEGER) ? NA_STRING : AG[pg[i]]; } break; } case LGLSXP: { int *pout = LOGICAL(out); if(nog) { int AG = Rf_asLogical(xAG); for(int i = l; i--; ) pout[i] = (px[i] == NA_INTEGER) ? NA_LOGICAL : AG; } else { int *AG = LOGICAL(xAG)-1; for(int i = l; i--; ) pout[i] = (px[i] == NA_INTEGER) ? NA_LOGICAL : AG[pg[i]]; } break; } default: stop("Not supported SEXP type!"); } break; } case STRSXP: { SEXP *px = STRING_PTR(x); switch(txAG) { case REALSXP: { double *pout = REAL(out); if(nog) { double AG = Rf_asReal(xAG); for(int i = l; i--; ) pout[i] = (px[i] == NA_STRING) ? NA_REAL : AG; } else { double *AG = REAL(xAG)-1; for(int i = l; i--; ) pout[i] = (px[i] == NA_STRING) ? NA_REAL : AG[pg[i]]; } break; } case INTSXP: { int *pout = INTEGER(out); if(nog) { int AG = Rf_asInteger(xAG); for(int i = l; i--; ) pout[i] = (px[i] == NA_STRING) ? NA_INTEGER : AG; } else { int *AG = INTEGER(xAG)-1; for(int i = l; i--; ) pout[i] = (px[i] == NA_STRING) ? NA_INTEGER : AG[pg[i]]; } break; } case STRSXP: { SEXP *pout = STRING_PTR(out); if(nog) { SEXP AG = Rf_asChar(xAG); for(int i = l; i--; ) pout[i] = (px[i] == NA_STRING) ? NA_STRING : AG; } else { SEXP *AG = STRING_PTR(xAG)-1; for(int i = l; i--; ) pout[i] = (px[i] == NA_STRING) ? NA_STRING : AG[pg[i]]; } break; } case LGLSXP: { int *pout = LOGICAL(out); if(nog) { int AG = Rf_asLogical(xAG); for(int i = l; i--; ) pout[i] = (px[i] == NA_STRING) ? NA_LOGICAL : AG; } else { int *AG = LOGICAL(xAG)-1; for(int i = l; i--; ) pout[i] = (px[i] == NA_STRING) ? NA_LOGICAL : AG[pg[i]]; } break; } default: stop("Not supported SEXP type!"); } break; } case LGLSXP: { int *px = LOGICAL(x); switch(txAG) { case REALSXP: { double *pout = REAL(out); if(nog) { double AG = Rf_asReal(xAG); for(int i = l; i--; ) pout[i] = (px[i] == NA_LOGICAL) ? NA_REAL : AG; } else { double *AG = REAL(xAG)-1; for(int i = l; i--; ) pout[i] = (px[i] == NA_LOGICAL) ? NA_REAL : AG[pg[i]]; } break; } case INTSXP: { int *pout = INTEGER(out); if(nog) { int AG = Rf_asInteger(xAG); for(int i = l; i--; ) pout[i] = (px[i] == NA_LOGICAL) ? NA_INTEGER : AG; } else { int *AG = INTEGER(xAG)-1; for(int i = l; i--; ) pout[i] = (px[i] == NA_LOGICAL) ? NA_INTEGER : AG[pg[i]]; } break; } case STRSXP: { SEXP *pout = STRING_PTR(out); if(nog) { SEXP AG = Rf_asChar(xAG); for(int i = l; i--; ) pout[i] = (px[i] == NA_LOGICAL) ? NA_STRING : AG; } else { SEXP *AG = STRING_PTR(xAG)-1; for(int i = l; i--; ) pout[i] = (px[i] == NA_LOGICAL) ? NA_STRING : AG[pg[i]]; } break; } case LGLSXP: { int *pout = LOGICAL(out); if(nog) { int AG = Rf_asLogical(xAG); for(int i = l; i--; ) pout[i] = (px[i] == NA_LOGICAL) ? NA_LOGICAL : AG; } else { int *AG = LOGICAL(xAG)-1; for(int i = l; i--; ) pout[i] = (px[i] == NA_LOGICAL) ? NA_LOGICAL : AG[pg[i]]; } break; } default: stop("Not supported SEXP type!"); } break; } default: stop("Not supported SEXP type!"); } if(Rf_isObject(xAG)) SHALLOW_DUPLICATE_ATTRIB(out, xAG); else if(!Rf_isObject(x) || (tx == txAG && !Rf_isFactor(x))) SHALLOW_DUPLICATE_ATTRIB(out, x); else { SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_classgets(out, R_NilValue); // OK ! Rf_setAttrib(out, R_LevelsSymbol, R_NilValue); } UNPROTECT(1); return out; } // TODO: allow integer input ?? SEXP retoth(const NumericVector& x, const NumericVector& xAG, const SEXP& g, int ret = 3) { int gs = Rf_length(g), l = x.size(); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 NumericVector out = no_init_vector(l); if(gs == 1) { if(xAG.size() != 1) stop("If g = NULL, STATS needs to be an atomic element!"); double AGx = xAG[0]; switch(ret) { case 3: out = x - AGx; break; case 4: stop("This transformation can only be performed with groups!"); case 5: out = x * (1/AGx); break; case 6: out = x * (100 / AGx); break; case 7: out = x + AGx; break; case 8: out = x * AGx; break; case 9: for(int i = 0; i != l; ++i) out[i] = mymod(x[i], AGx); break; case 10: for(int i = 0; i != l; ++i) out[i] = myremain(x[i], AGx); break; default: stop("Unknown Transformation"); } } else { if(gs != l) stop("length(g) must match nrow(x)"); double *px = REAL(x), *pout = REAL(out), *pAG = REAL(xAG)-1; int *pg = INTEGER(g); switch(ret) { case 3: for(int i = l; i--; ) pout[i] = px[i] - pAG[pg[i]]; break; case 4: { long double OM = 0; // better precision int n = 0; for(int i = l; i--; ) { if(std::isnan(px[i])) pout[i] = px[i]; else { pout[i] = px[i] - pAG[pg[i]]; if(std::isnan(pAG[pg[i]])) continue; // If one AG remained NA, OM becomes NA OM += pAG[pg[i]]; ++n; } } OM = OM / n; out = out + (double)OM; // Fastest ? break; } case 5: for(int i = l; i--; ) pout[i] = px[i] / pAG[pg[i]]; // Fastest ? break; case 6: for(int i = l; i--; ) pout[i] = px[i] * (100 / pAG[pg[i]]); break; case 7: for(int i = l; i--; ) pout[i] = px[i] + pAG[pg[i]]; break; case 8: for(int i = l; i--; ) pout[i] = px[i] * pAG[pg[i]]; break; case 9: for(int i = l; i--; ) pout[i] = mymod(px[i], pAG[pg[i]]); break; case 10: for(int i = l; i--; ) pout[i] = myremain(px[i], pAG[pg[i]]); break; default: stop("Unknown Transformation"); } } SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } // [[Rcpp::export]] SEXP TRACpp(const SEXP& x, const SEXP& xAG, const IntegerVector& g = 0, int ret = 1) { if(ret <= 2) { if(ret == 1) return ret1(x, xAG, g); return ret2(x, xAG, g); } return retoth(x, xAG, g, ret); } // [[Rcpp::export]] List TRAlCpp(const List& x, const SEXP& xAG, const IntegerVector& g = 0, int ret = 1) { int l = x.size(); if(Rf_length(xAG) != l) stop("NCOL(x) must match NCOL(STATS)"); List out(l); switch(TYPEOF(xAG)) { case VECSXP: { List AG = xAG; if(ret == 1) for(int j = l; j--; ) out[j] = ret1(x[j], AG[j], g); else if(ret == 2) for(int j = l; j--; ) out[j] = ret2(x[j], AG[j], g); else for(int j = l; j--; ) out[j] = retoth(x[j], AG[j], g, ret); break; } case REALSXP: { NumericVector AG = xAG; if(ret == 1) for(int j = l; j--; ) out[j] = ret1(x[j], Rf_ScalarReal(AG[j]), g); else if(ret == 2) for(int j = l; j--; ) out[j] = ret2(x[j], Rf_ScalarReal(AG[j]), g); else for(int j = l; j--; ) out[j] = retoth(x[j], Rf_ScalarReal(AG[j]), g, ret); break; } case INTSXP: { IntegerVector AG = xAG; if(ret == 1) for(int j = l; j--; ) out[j] = ret1(x[j], Rf_ScalarInteger(AG[j]), g); else if(ret == 2) for(int j = l; j--; ) out[j] = ret2(x[j], Rf_ScalarInteger(AG[j]), g); else for(int j = l; j--; ) out[j] = retoth(x[j], Rf_ScalarInteger(AG[j]), g, ret); break; } case STRSXP: { CharacterVector AG = xAG; if(ret == 1) for(int j = l; j--; ) out[j] = ret1(x[j], Rf_ScalarString(AG[j]), g); // Rf_ScalarString ? -> Not really necessary, a scalar string is still a SEXP ... else if(ret == 2) for(int j = l; j--; ) out[j] = ret2(x[j], Rf_ScalarString(AG[j]), g); else stop("The requested transformation is not possible with strings"); break; } case LGLSXP: { LogicalVector AG = xAG; if(ret == 1) for(int j = l; j--; ) out[j] = ret1(x[j], Rf_ScalarLogical(AG[j]), g); else if(ret == 2) for(int j = l; j--; ) out[j] = ret2(x[j], Rf_ScalarLogical(AG[j]), g); else stop("The requested transformation is not possible with logical data"); break; } default: stop("Not supported SEXP type!"); } SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } // TODO: "replace" method for matrices is a bit slower than before, but overall pretty good! // [[Rcpp::export]] SEXP TRAmCpp(const SEXP& x, const SEXP& xAG, const IntegerVector& g = 0, int ret = 1) { SEXP dim = Rf_getAttrib(x, R_DimSymbol); if(Rf_isNull(dim)) stop("x is not a matrix"); int tx = TYPEOF(x), txAG = TYPEOF(xAG), gs = g.size(), row = INTEGER(dim)[0], col = INTEGER(dim)[1], *pg = INTEGER(g), ng = 0; bool nog = gs == 1; if(nog) { if(Rf_length(xAG) != col) stop("If g = NULL, NROW(STATS) needs to be 1"); } else { if(gs != row) stop("length(g) must match ncol(x)"); if(Rf_ncols(xAG) != col) stop("ncol(STATS) must match ncol(x)"); ng = Rf_nrows(xAG); } if(ret <= 2) { SEXP out = PROTECT(Rf_allocVector(txAG, row * col)); if(ret == 1) { switch(txAG) { case REALSXP: { double *pout = REAL(out); if(nog) { double *pAG = REAL(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = REAL(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = AG[pg[i]]; } } break; } case INTSXP: { int *pout = INTEGER(out); if(nog) { int *pAG = INTEGER(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row, AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row, *AG = INTEGER(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = AG[pg[i]]; } } break; } case STRSXP: { SEXP *pout = STRING_PTR(out); if(nog) { SEXP *pAG = STRING_PTR(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; SEXP AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; SEXP *AG = STRING_PTR(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = AG[pg[i]]; } } break; } case LGLSXP: { int *pout = LOGICAL(out); if(nog) { int *pAG = LOGICAL(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row, AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row, *AG = LOGICAL(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = AG[pg[i]]; } } break; } default: stop("Not supported SEXP type!"); } } else { // ret == 2 switch(tx) { case REALSXP: { double *px = REAL(x); switch(txAG) { case REALSXP: { double *pout = REAL(out); if(nog) { double *pAG = REAL(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = (std::isnan(px[i])) ? NA_REAL : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = REAL(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = (std::isnan(px[i + s])) ? NA_REAL : AG[pg[i]]; } } break; } case INTSXP: { int *pout = INTEGER(out); if(nog) { int *pAG = INTEGER(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row, AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = (std::isnan(px[i])) ? NA_INTEGER : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row, *AG = INTEGER(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = (std::isnan(px[i + s])) ? NA_INTEGER : AG[pg[i]]; } } break; } case STRSXP: { SEXP *pout = STRING_PTR(out); if(nog) { SEXP *pAG = STRING_PTR(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; SEXP AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = (std::isnan(px[i])) ? NA_STRING : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; SEXP *AG = STRING_PTR(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = (std::isnan(px[i + s])) ? NA_STRING : AG[pg[i]]; } } break; } case LGLSXP: { int *pout = LOGICAL(out); if(nog) { int *pAG = LOGICAL(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row, AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = (std::isnan(px[i])) ? NA_LOGICAL : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row, *AG = LOGICAL(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = (std::isnan(px[i + s])) ? NA_LOGICAL : AG[pg[i]]; } } break; } default: stop("Not supported SEXP type!"); } break; } case INTSXP: { int *px = INTEGER(x); switch(txAG) { case REALSXP: { double *pout = REAL(out); if(nog) { double *pAG = REAL(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = (px[i] == NA_INTEGER) ? NA_REAL : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = REAL(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = (px[i + s] == NA_INTEGER) ? NA_REAL : AG[pg[i]]; } } break; } case INTSXP: { int *pout = INTEGER(out); if(nog) { int *pAG = INTEGER(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row, AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = (px[i] == NA_INTEGER) ? NA_INTEGER : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row, *AG = INTEGER(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = (px[i + s] == NA_INTEGER) ? NA_INTEGER : AG[pg[i]]; } } break; } case STRSXP: { SEXP *pout = STRING_PTR(out); if(nog) { SEXP *pAG = STRING_PTR(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; SEXP AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = (px[i] == NA_INTEGER) ? NA_STRING : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; SEXP *AG = STRING_PTR(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = (px[i + s] == NA_INTEGER) ? NA_STRING : AG[pg[i]]; } } break; } case LGLSXP: { int *pout = LOGICAL(out); if(nog) { int *pAG = LOGICAL(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row, AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = (px[i] == NA_INTEGER) ? NA_LOGICAL : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row, *AG = LOGICAL(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = (px[i + s] == NA_INTEGER) ? NA_LOGICAL : AG[pg[i]]; } } break; } default: stop("Not supported SEXP type!"); } break; } case STRSXP: { SEXP *px = STRING_PTR(x); switch(txAG) { case REALSXP: { double *pout = REAL(out); if(nog) { double *pAG = REAL(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = (px[i] == NA_STRING) ? NA_REAL : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = REAL(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = (px[i + s] == NA_STRING) ? NA_REAL : AG[pg[i]]; } } break; } case INTSXP: { int *pout = INTEGER(out); if(nog) { int *pAG = INTEGER(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row, AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = (px[i] == NA_STRING) ? NA_INTEGER : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row, *AG = INTEGER(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = (px[i + s] == NA_STRING) ? NA_INTEGER : AG[pg[i]]; } } break; } case STRSXP: { SEXP *pout = STRING_PTR(out); if(nog) { SEXP *pAG = STRING_PTR(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; SEXP AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = (px[i] == NA_STRING) ? NA_STRING : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; SEXP *AG = STRING_PTR(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = (px[i + s] == NA_STRING) ? NA_STRING : AG[pg[i]]; } } break; } case LGLSXP: { int *pout = LOGICAL(out); if(nog) { int *pAG = LOGICAL(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row, AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = (px[i] == NA_STRING) ? NA_LOGICAL : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row, *AG = LOGICAL(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = (px[i + s] == NA_STRING) ? NA_LOGICAL : AG[pg[i]]; } } break; } default: stop("Not supported SEXP type!"); } break; } case LGLSXP: { int *px = LOGICAL(x); switch(txAG) { case REALSXP: { double *pout = REAL(out); if(nog) { double *pAG = REAL(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = (px[i] == NA_LOGICAL) ? NA_REAL : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = REAL(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = (px[i + s] == NA_LOGICAL) ? NA_REAL : AG[pg[i]]; } } break; } case INTSXP: { int *pout = INTEGER(out); if(nog) { int *pAG = INTEGER(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row, AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = (px[i] == NA_LOGICAL) ? NA_INTEGER : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row, *AG = INTEGER(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = (px[i + s] == NA_LOGICAL) ? NA_INTEGER : AG[pg[i]]; } } break; } case STRSXP: { SEXP *pout = STRING_PTR(out); if(nog) { SEXP *pAG = STRING_PTR(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; SEXP AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = (px[i] == NA_LOGICAL) ? NA_STRING : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; SEXP *AG = STRING_PTR(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = (px[i + s] == NA_LOGICAL) ? NA_STRING : AG[pg[i]]; } } break; } case LGLSXP: { int *pout = LOGICAL(out); if(nog) { int *pAG = LOGICAL(xAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row, AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = (px[i] == NA_LOGICAL) ? NA_LOGICAL : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row, *AG = LOGICAL(xAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = (px[i + s] == NA_LOGICAL) ? NA_LOGICAL : AG[pg[i]]; } } break; } default: stop("Not supported SEXP type!"); } break; } default: stop("Not supported SEXP type!"); } } if(Rf_isObject(xAG)) SHALLOW_DUPLICATE_ATTRIB(out, xAG); else if(!Rf_isObject(x) || (tx == txAG && !Rf_isFactor(x))) SHALLOW_DUPLICATE_ATTRIB(out, x); else { SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_classgets(out, R_NilValue); // OK ! Rf_setAttrib(out, R_LevelsSymbol, R_NilValue); } UNPROTECT(1); return out; } else { // ret > 2 SEXP out = PROTECT(Rf_allocVector(REALSXP, row * col)); double *pout = REAL(out), *px; NumericVector xxAG = xAG; switch(tx) { case REALSXP: case INTSXP: { if(tx == INTSXP) { // TODO: Better solution ! NumericVector xx = x; // Rf_coerceVector(x, REALSXP); px = REAL(xx); } else { px = REAL(x); } switch(ret) { case 3: { if(nog) { double *pAG = REAL(xxAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = px[i] - AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = REAL(xxAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = px[i + s] - AG[pg[i]]; } } break; } case 4: { if(nog) stop("This transformation can only be computed with groups!"); for(int j = 0; j != col; ++j) { int s = j * row, n = 0; long double OM = 0; double *AG = REAL(xxAG) + j * ng - 1; for(int i = 0; i != row; ++i) { if(std::isnan(px[i + s])) pout[i + s] = px[i + s]; else { pout[i + s] = px[i + s] - AG[pg[i]]; if(std::isnan(AG[pg[i]])) continue; OM += AG[pg[i]]; ++n; } } double OMD = double(OM / n); for(int i = row; i--; ) pout[i + s] += OMD; } break; } case 5: { if(nog) { double *pAG = REAL(xxAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = 1 / pAG[j]; for(int i = s; i != e; ++i) pout[i] = px[i] * AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = REAL(xxAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = px[i + s] * (1 / AG[pg[i]]); } } break; } case 6: { if(nog) { double *pAG = REAL(xxAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = 100 / pAG[j]; for(int i = s; i != e; ++i) pout[i] = px[i] * AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = REAL(xxAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = px[i + s] * (100 / AG[pg[i]]); } } break; } case 7: { if(nog) { double *pAG = REAL(xxAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = px[i] + AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = REAL(xxAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = px[i + s] + AG[pg[i]]; } } break; } case 8: { if(nog) { double *pAG = REAL(xxAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = px[i] * AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = REAL(xxAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = px[i + s] * AG[pg[i]]; } } break; } case 9: { if(nog) { double *pAG = REAL(xxAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = mymod(px[i], AGj); } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = REAL(xxAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = mymod(px[i + s], AG[pg[i]]); } } break; } case 10: { if(nog) { double *pAG = REAL(xxAG); for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = pAG[j]; for(int i = s; i != e; ++i) pout[i] = myremain(px[i], AGj); } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = REAL(xxAG) + j * ng - 1; for(int i = 0; i != row; ++i) pout[i + s] = myremain(px[i + s], AG[pg[i]]); } } break; } default: stop("Unknown Transformation"); } break; } case STRSXP: stop("The requested transformation is not possible with strings"); case LGLSXP: stop("The requested transformation is not possible with logical data"); default: stop("Not supported SEXP type!"); } SHALLOW_DUPLICATE_ATTRIB(out, x); UNPROTECT(1); return out; } } collapse/src/ffirst.c0000644000176200001440000003024414174223734014316 0ustar liggesusers#include "collapse_c.h" // #include // #include // TODO: Implemented smarter copy names ?! // About Pointers // https://www.tutorialspoint.com/cprogramming/c_pointers.htm // https://www.tutorialspoint.com/cprogramming/c_pointer_arithmetic.htm // Use const ? SEXP ffirst_impl(SEXP x, int ng, SEXP g, int narm, int *gl) { int l = length(x), tx = TYPEOF(x), end = l-1; if (l < 2) return x; // Prevents seqfault for numeric(0) #101 if (ng == 0) { SEXP out = PROTECT(allocVector(tx, 1)); int j = 0; if(narm) { switch(tx) { case REALSXP: { double *px = REAL(x); while(ISNAN(px[j]) && j != end) ++j; REAL(out)[0] = px[j]; break; } case STRSXP: { SEXP *px = STRING_PTR(x); while(px[j] == NA_STRING && j != end) ++j; SET_STRING_ELT(out, 0, px[j]); break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x); while(px[j] == NA_INTEGER && j != end) ++j; INTEGER(out)[0] = px[j]; break; } case VECSXP: { SEXP *px = SEXPPTR(x); while(length(px[j]) == 0 && j != end) ++j; SET_VECTOR_ELT(out, 0, px[j]); break; } default: error("Unsupported SEXP type!"); } } else { switch(tx) { case REALSXP: REAL(out)[0] = REAL(x)[0]; break; case STRSXP: SET_STRING_ELT(out, 0, STRING_ELT(x, 0)); break; case INTSXP: case LGLSXP: INTEGER(out)[0] = INTEGER(x)[0]; break; case VECSXP: SET_VECTOR_ELT(out, 0, VECTOR_ELT(x, 0)); break; default: error("Unsupported SEXP type!"); } } copyMostAttrib(x, out); if(!isNull(getAttrib(x, R_NamesSymbol))) namesgets(out, ScalarString(STRING_ELT(getAttrib(x, R_NamesSymbol), j))); UNPROTECT(1); return out; } else { // with groups if(length(g) != l) error("length(g) must match nrow(X)"); SEXP out = PROTECT(allocVector(tx, ng)); if(narm) { int ngs = 0, *pg = INTEGER(g); switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int i = ng; i--; ) pout[i] = NA_REAL; --pout; for(int i = 0; i != l; ++i) { if(NISNAN(px[i])) { // Fastest ??? if(ISNAN(pout[pg[i]])) { pout[pg[i]] = px[i]; if(++ngs == ng) break; } } } break; } case STRSXP: { SEXP *px = STRING_PTR(x), *pout = STRING_PTR(out); for(int i = ng; i--; ) pout[i] = NA_STRING; --pout; for(int i = 0; i != l; ++i) { if(px[i] != NA_STRING) { if(pout[pg[i]] == NA_STRING) { pout[pg[i]] = px[i]; if(++ngs == ng) break; } } } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int i = ng; i--; ) pout[i] = NA_INTEGER; --pout; for(int i = 0; i != l; ++i) { if(px[i] != NA_INTEGER) { if(pout[pg[i]] == NA_INTEGER) { pout[pg[i]] = px[i]; if(++ngs == ng) break; } } } break; } case VECSXP: { SEXP *px = SEXPPTR(x), *pout = SEXPPTR(out); for(int i = ng; i--; ) pout[i] = R_NilValue; // R_NilValue or just leave empty ?? --pout; for(int i = 0; i != l; ++i) { if(length(px[i])) { if(pout[pg[i]] == R_NilValue) { pout[pg[i]] = px[i]; if(++ngs == ng) break; } } } break; } default: error("Unsupported SEXP type!"); } } else { // Old Implementation: With boolean array // bool gl[ng+1]; // memset(gl, 1, sizeof(bool) * (ng+1)); // for(int i = 0; i != l; ++i) { // if(gl[pg[i]]) { // gl[pg[i]] = false; // pout[pg[i]] = px[i]; // ++ngs; // if(ngs == ng) break; // } // } switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int i = ng; i--; ) pout[i] = px[gl[i]]; break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int i = ng; i--; ) pout[i] = px[gl[i]]; break; } case STRSXP: case VECSXP: { SEXP *px = SEXPPTR(x), *pout = SEXPPTR(out); for(int i = ng; i--; ) pout[i] = px[gl[i]]; break; } default: error("Unsupported SEXP type!"); } } copyMostAttrib(x, out); // SHALLOW_DUPLICATE_ATTRIB(out, x); UNPROTECT(1); return out; } } SEXP ffirstC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm) { int *pgl, ng = asInteger(Rng), narm = asLogical(Rnarm); if(ng == 0 || narm) { pgl = &ng; // TO avoid Wmaybe uninitialized return ffirst_impl(x, ng, g, narm, pgl); } // Using C-Array -> Not a good idea, variable length arrays give note on gcc11 SEXP gl = PROTECT(allocVector(INTSXP, ng)); int *pg = INTEGER(g), lg = length(g); pgl = INTEGER(gl); for(int i = ng; i--; ) pgl[i] = NA_INTEGER; --pgl; // &gl[0]-1 Or gl-1; // Pointer to -1 array element (since g starts from 1): https://beginnersbook.com/2014/01/c-pointer-to-array-example/ // Above gives gcc11 issue !! (works with R INTEGER() pointer, not plain C array) for(int i = 0; i != lg; ++i) if(pgl[pg[i]] == NA_INTEGER) pgl[pg[i]] = i; // SEXP gl = PROTECT(allocVector(INTSXP, ng)); // memset(gl, 0, sizeof(int)*ng); // // int *pg = INTEGER(g); // pgl = INTEGER(gl)-1; // Pointer to -1 array element (since g starts from 1): https://beginnersbook.com/2014/01/c-pointer-to-array-example/ // for(int i = length(g); i--; ) if(!pgl[pg[i]]) pgl[pg[i]] = i; // Correct? even for first value ? // SEXP out = PROTECT(allocVector(INTSXP, ng)); // int *pout = INTEGER(out); // for(int i = ng; i--; ) pout[i] = pgl[i+1]; // UNPROTECT(1); // return out; // Checking pointer: appears to be correct... // UNPROTECT(1); // return gl; SEXP res = ffirst_impl(x, ng, g, narm, ++pgl); UNPROTECT(1); return res; } SEXP ffirstlC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm) { int l = length(x), *pgl, ng = asInteger(Rng), narm = asLogical(Rnarm), nprotect = 1; if(ng > 0 && !narm) { // Cant use integer array here because apparently it is removed by the garbage collector when passed to a new function SEXP gl = PROTECT(allocVector(INTSXP, ng)); ++nprotect; int *pg = INTEGER(g), lg = length(g); // gl[ng], pgl = INTEGER(gl); // pgl = &gl[0]; for(int i = ng; i--; ) pgl[i] = NA_INTEGER; --pgl; for(int i = 0; i != lg; ++i) if(pgl[pg[i]] == NA_INTEGER) pgl[pg[i]] = i; ++pgl; } else pgl = &l; // To avoid Wmaybe uninitialized.. // return ffirst_impl(VECTOR_ELT(x, 0), ng, g, narm, pgl); SEXP out = PROTECT(allocVector(VECSXP, l)); SEXP *px = SEXPPTR(x), *pout = SEXPPTR(out); for(int j = 0; j != l; ++j) pout[j] = ffirst_impl(px[j], ng, g, narm, pgl); DFcopyAttr(out, x, ng); UNPROTECT(nprotect); return out; } // For matrix writing a separate function to increase efficiency. SEXP ffirstmC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); int tx = TYPEOF(x), ng = asInteger(Rng), narm = asLogical(Rnarm), l = INTEGER(dim)[0], col = INTEGER(dim)[1], end = l-1; if (l < 2) return x; if (ng == 0) { SEXP out = PROTECT(allocVector(tx, col)); if(narm) { switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int j = 0, i = 0; j != col; ++j) { while(ISNAN(px[i]) && i != end) ++i; pout[j] = px[i]; px += l; i = 0; } break; } case STRSXP: { SEXP *px = STRING_PTR(x), *pout = STRING_PTR(out); for(int j = 0, i = 0; j != col; ++j) { while(px[i] == NA_STRING && i != end) ++i; pout[j] = px[i]; px += l; i = 0; } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int j = 0, i = 0; j != col; ++j) { while(px[i] == NA_INTEGER && i != end) ++i; pout[j] = px[i]; px += l; i = 0; } break; } case VECSXP: { SEXP *px = SEXPPTR(x), *pout = SEXPPTR(out); for(int j = 0, i = 0; j != col; ++j) { while(length(px[i]) == 0 && i != end) ++i; pout[j] = px[i]; px += l; i = 0; } break; } default: error("Unsupported SEXP type!"); } } else { switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int j = 0; j != col; ++j) pout[j] = px[j * l]; break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int j = 0; j != col; ++j) pout[j] = px[j * l]; break; } case STRSXP: case VECSXP: { SEXP *px = SEXPPTR(x), *pout = SEXPPTR(out); for(int j = 0; j != col; ++j) pout[j] = px[j * l]; break; } default: error("Unsupported SEXP type!"); } } matCopyAttr(out, x, Rdrop, ng); UNPROTECT(1); return out; } else { // with groups if(length(g) != l) error("length(g) must match nrow(X)"); SEXP out = PROTECT(allocVector(tx, ng * col)); int *pg = INTEGER(g); if(narm) { switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int i = ng * col; i--; ) pout[i] = NA_REAL; --pout; for(int j = 0; j != col; ++j) { for(int i = 0; i != l; ++i) if(NISNAN(px[i]) && ISNAN(pout[pg[i]])) pout[pg[i]] = px[i]; px += l; pout += ng; } break; } case STRSXP: { SEXP *px = STRING_PTR(x), *pout = STRING_PTR(out); for(int i = ng * col; i--; ) pout[i] = NA_STRING; --pout; for(int j = 0; j != col; ++j) { for(int i = 0; i != l; ++i) if(px[i] != NA_STRING && pout[pg[i]] == NA_STRING) pout[pg[i]] = px[i]; px += l; pout += ng; } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int i = ng * col; i--; ) pout[i] = NA_INTEGER; --pout; for(int j = 0; j != col; ++j) { for(int i = 0; i != l; ++i) if(px[i] != NA_INTEGER && pout[pg[i]] == NA_INTEGER) pout[pg[i]] = px[i]; px += l; pout += ng; } break; } case VECSXP: { SEXP *px = SEXPPTR(x), *pout = SEXPPTR(out); for(int i = ng * col; i--; ) pout[i] = R_NilValue; --pout; for(int j = 0; j != col; ++j) { for(int i = 0; i != l; ++i) if(length(px[i]) && pout[pg[i]] == R_NilValue) pout[pg[i]] = px[i]; px += l; pout += ng; } break; } default: error("Unsupported SEXP type!"); } } else { SEXP gl = PROTECT(allocVector(INTSXP, ng)); // int gl[ng], *pgl; pgl = &gl[0]; int *pgl = INTEGER(gl); for(int i = ng; i--; ) pgl[i] = NA_INTEGER; --pgl; // gcc11 issue with plain array for(int i = 0; i != l; ++i) if(pgl[pg[i]] == NA_INTEGER) pgl[pg[i]] = i; ++pgl; switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int j = 0; j != col; ++j) { for(int i = ng; i--; ) pout[i] = px[pgl[i]]; px += l; pout += ng; } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int j = 0; j != col; ++j) { for(int i = ng; i--; ) pout[i] = px[pgl[i]]; px += l; pout += ng; } break; } case STRSXP: case VECSXP: { SEXP *px = SEXPPTR(x), *pout = SEXPPTR(out); for(int j = 0; j != col; ++j) { for(int i = ng; i--; ) pout[i] = px[pgl[i]]; px += l; pout += ng; } break; } default: error("Unsupported SEXP type!"); } UNPROTECT(1); } matCopyAttr(out, x, Rdrop, ng); UNPROTECT(1); return out; } } collapse/src/pwnobs.cpp0000644000176200001440000002613314056063245014671 0ustar liggesusers// [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; template IntegerMatrix pwnobsmCppImpl(const Matrix& x) { int l = x.nrow(), col = x.ncol(); auto isnnanT = (RTYPE == REALSXP) ? [](typename Rcpp::traits::storage_type::type x) { return x == x; } : [](typename Rcpp::traits::storage_type::type x) { return x != Vector::get_na(); }; IntegerMatrix out = no_init_matrix(col, col); for(int j = 0; j != col; ++j) { ConstMatrixColumn colj = x( _ , j); int nj = std::count_if(colj.begin(), colj.end(), isnnanT); out(j, j) = nj; for(int k = j+1; k != col; ++k) { ConstMatrixColumn colk = x( _ , k); int njk = 0; for(int i = l; i--; ) if(isnnanT(colj[i]) && isnnanT(colk[i])) ++njk; // fastest? or save logical vector with colj Non-missing? out(j, k) = out(k, j) = njk; } } Rf_dimnamesgets(out, List::create(colnames(x), colnames(x))); return out; } template <> IntegerMatrix pwnobsmCppImpl(const Matrix& x) { stop("Not supported SEXP type!"); } template <> IntegerMatrix pwnobsmCppImpl(const Matrix& x) { stop("Not supported SEXP type!"); } template <> IntegerMatrix pwnobsmCppImpl(const Matrix& x) { stop("Not supported SEXP type!"); } template <> IntegerMatrix pwnobsmCppImpl(const Matrix& x) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] IntegerMatrix pwnobsmCpp(SEXP x){ RCPP_RETURN_MATRIX(pwnobsmCppImpl, x); } // Old / Experimental: // // inline bool nisnan(double x) { // return x == x; // } // // Not fast !!! : // // [[Rcpp::export]] // IntegerMatrix pwnobslCpp(const List& x) { // int l = x.size(); // IntegerMatrix out = no_init_matrix(l, l); // for(int j = 0; j != l; ++j) { // switch(TYPEOF(x[j])) { // case REALSXP: { // NumericVector colj = x[j]; // int nj = std::count_if(colj.begin(), colj.end(), nisnan); // int rowj = colj.size(); // out(j, j) = nj; // for(int k = j+1; k != l; ++k) { // switch(TYPEOF(x[k])) { // case REALSXP: { // NumericVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(nisnan(colj[i]) && nisnan(colk[i])) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case INTSXP: { // IntegerVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(nisnan(colj[i]) && colk[i] != NA_INTEGER) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case STRSXP: { // CharacterVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(nisnan(colj[i]) && colk[i] != NA_STRING) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case LGLSXP: { // CharacterVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(nisnan(colj[i]) && colk[i] != NA_LOGICAL) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // default: stop("incompatible SEXP encountered;"); // } // } // break; // } // case INTSXP: { // IntegerVector colj = x[j]; // int rowj = colj.size(); // int nj = rowj - std::count(colj.begin(), colj.end(), NA_INTEGER); // out(j, j) = nj; // for(int k = j+1; k != l; ++k) { // switch(TYPEOF(x[k])) { // case REALSXP: { // NumericVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_INTEGER && nisnan(colk[i])) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case INTSXP: { // IntegerVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_INTEGER && colk[i] != NA_INTEGER) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case STRSXP: { // CharacterVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_INTEGER && colk[i] != NA_STRING) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case LGLSXP: { // CharacterVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_INTEGER && colk[i] != NA_LOGICAL) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // default: stop("incompatible SEXP encountered;"); // } // } // break; // } // case STRSXP: { // CharacterVector colj = x[j]; // int rowj = colj.size(); // int nj = rowj - std::count(colj.begin(), colj.end(), NA_STRING); // out(j, j) = nj; // for(int k = j+1; k != l; ++k) { // switch(TYPEOF(x[k])) { // case REALSXP: { // NumericVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_STRING && nisnan(colk[i])) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case INTSXP: { // IntegerVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_STRING && colk[i] != NA_INTEGER) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case STRSXP: { // CharacterVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_STRING && colk[i] != NA_STRING) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case LGLSXP: { // CharacterVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_STRING && colk[i] != NA_LOGICAL) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // default: stop("incompatible SEXP encountered;"); // } // } // break; // } // case LGLSXP: { // LogicalVector colj = x[j]; // int rowj = colj.size(); // int nj = rowj - std::count(colj.begin(), colj.end(), NA_LOGICAL); // out(j, j) = nj; // for(int k = j+1; k != l; ++k) { // switch(TYPEOF(x[k])) { // case REALSXP: { // NumericVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_LOGICAL && nisnan(colk[i])) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case INTSXP: { // IntegerVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_LOGICAL && colk[i] != NA_INTEGER) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case STRSXP: { // CharacterVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_LOGICAL && colk[i] != NA_STRING) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case LGLSXP: { // CharacterVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_LOGICAL && colk[i] != NA_LOGICAL) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // default: stop("incompatible SEXP encountered;"); // } // } // break; // } // default: // stop("incompatible SEXP encountered;"); // } // } // out.attr("dimnames") = List::create(x.attr("names"), x.attr("names")); // return out; // } // // // [[Rcpp::export]] // IntegerMatrix pwnobslCpp(const List& x) { // int l = x.size(); // IntegerMatrix out = no_init_matrix(l, l); // for(int j = 0; j != l; ++j) { // int RTYPEj = TYPEOF(x[j]); // auto isnnanTj = (RTYPEj == REALSXP) ? [](typename Rcpp::traits::storage_type::type x) { return x == x; } : // [](typename Rcpp::traits::storage_type::type x) { return x != Vector::get_na(); }; // Vector colj = x[j]; // int nj = std::count_if(colj.begin(), colj.end(), isnnanTj); // int rowj = colj.size(); // out(j, j) = nj; // for(int k = j+1; k != col; ++k) { // int RTYPEk = TYPEOF(x[k]); // auto isnnanTk = (RTYPEk == REALSXP) ? [](typename Rcpp::traits::storage_type::type x) { return x == x; } : // [](typename Rcpp::traits::storage_type::type x) { return x != Vector::get_na(); }; // Vector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(isnnanTj(colj[i]) && isnnanTk(colk[i])) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // } // } // out.attr("dimnames") = List::create(names(x), names(x)); // return out; // } collapse/src/fNdistinct.cpp0000644000176200001440000005033214174223734015466 0ustar liggesusers// [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; // TODO: Perhaps redo everything with data pointers and 2d group indices (instead of filling the 2d structure every time !): http://www.cplusplus.com/reference/vector/vector/data/ // https://stackoverflow.com/questions/1733143/converting-between-c-stdvector-and-c-array-without-copying?rq=1 template IntegerVector fndistinctImpl(const Vector& x, int ng, const IntegerVector& g, const SEXP& gs, bool narm) { int l = x.size(); typedef typename Rcpp::traits::storage_type::type storage_t; auto isnanT = (RTYPE == REALSXP) ? [](storage_t x) { return x != x; } : [](storage_t x) { return x == Vector::get_na(); }; unsigned int addr; if(ng == 0) { sugar::IndexHash hash(x); if(narm) { // unsigned int addr; // int *data = hash.data; // int &m = hash.m; // int &size_ = hash.size_; // auto get_addr = hash.get_addr; // sugar::IndexHash::get_addr; // auto not_equal = hash.not_equal; // sugar::IndexHash::not_equal; // IndexHash *hashptr = new hash; // auto get_addr2 = (hash.*get_addr); for(int i = 0; i != l; ++i) { storage_t val = hash.src[i]; if(isnanT(val)) continue; addr = hash.get_addr(val); while(hash.data[addr] && hash.not_equal(hash.src[hash.data[addr] - 1], val)) { ++addr; if(addr == static_cast(hash.m)) addr = 0; } if(!hash.data[addr]) { hash.data[addr] = i+1; ++hash.size_; } } } else { hash.fill(); } return Rf_ScalarInteger(hash.size_); } else { // unsigned int addr; if(l != g.size()) stop("length(g) must match length(x)"); std::vector > gmap(ng+1); IntegerVector out(ng); int *outm1 = out.begin()-1; if(Rf_isNull(gs)) { for(int i = 0; i != l; ++i) ++outm1[g[i]]; for(int i = 0; i != ng; ++i) { if(out[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); gmap[i+1] = std::vector (out[i]); out[i] = 0; } // memset(out, 0, sizeof(int)*ng); // Stable ? -> Nope, gives error } else { IntegerVector gsv = gs; if(ng != gsv.size()) stop("ng must match length(gs)"); for(int i = 0; i != ng; ++i) { if(gsv[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); gmap[i+1] = std::vector (gsv[i]); } } for(int i = 0; i != l; ++i) gmap[g[i]][outm1[g[i]]++] = x[i]; if(narm) { for(int gr = 0; gr != ng; ++gr) { // const std::vector& temp = gmap[gr+1]; // good ? // const Vector& // wrap() sugar::IndexHash hash(wrap(gmap[gr+1])); // temp for(int i = hash.n; i--; ) { storage_t val = hash.src[i]; if(isnanT(val)) continue; addr = hash.get_addr(val); while(hash.data[addr] && hash.not_equal(hash.src[hash.data[addr] - 1], val)) { ++addr; if(addr == static_cast(hash.m)) addr = 0; } if(!hash.data[addr]) { hash.data[addr] = i+1; ++hash.size_; } } out[gr] = hash.size_; } } else { for(int gr = 0; gr != ng; ++gr) { sugar::IndexHash hash(wrap(gmap[gr+1])); hash.fill(); out[gr] = hash.size_; } } if(!Rf_isObject(x)) { Rf_copyMostAttrib(x, out); // SHALLOW_DUPLICATE_ATTRIB(out, x); } else { SEXP sym_label = Rf_install("label"); Rf_setAttrib(out, sym_label, Rf_getAttrib(x, sym_label)); } return out; } } IntegerVector fndistinctFACT(const IntegerVector& x, int ng, const IntegerVector& g, const SEXP& gs, bool narm) { int l = x.size(), nlevp = Rf_nlevels(x)+1, n = 1; if(ng == 0) { std::vector uxp(nlevp, true); if(narm) { for(int i = 0; i != l; ++i) { if(x[i] != NA_INTEGER && uxp[x[i]]) { // save xi = x[i] ? Faster ? uxp[x[i]] = false; if(++n == nlevp) break; } } } else { bool anyNA = false; for(int i = 0; i != l; ++i) { if(x[i] == NA_INTEGER) { anyNA = true; continue; } if(uxp[x[i]]) { // save xi = x[i] ? Faster ? uxp[x[i]] = false; if(++n == nlevp && anyNA) break; } } n += anyNA; } return Rf_ScalarInteger(n-1); } else { // unsigned int addr; if(l != g.size()) stop("length(g) must match length(x)"); std::vector > gmap(ng+1); IntegerVector out(ng); int *outm1 = out.begin()-1; if(Rf_isNull(gs)) { for(int i = 0; i != l; ++i) ++outm1[g[i]]; for(int i = 0; i != ng; ++i) { if(out[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); gmap[i+1] = std::vector (out[i]); out[i] = 0; } } else { IntegerVector gsv = gs; if(ng != gsv.size()) stop("ng must match length(gs)"); for(int i = 0; i != ng; ++i) { if(gsv[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); gmap[i+1] = std::vector (gsv[i]); } } for(int i = 0; i != l; ++i) gmap[g[i]][outm1[g[i]]++] = x[i]; if(narm) { for(int gr = 0; gr != ng; ++gr) { const std::vector& temp = gmap[gr+1]; n = 1; std::vector uxp(nlevp, true); for(int i = temp.size(); i--; ) { if(temp[i] != NA_INTEGER && uxp[temp[i]]) { // save xi = x[i] ? Faster ? uxp[temp[i]] = false; if(++n == nlevp) break; } } out[gr] = n - 1; } } else { for(int gr = 0; gr != ng; ++gr) { const std::vector& temp = gmap[gr+1]; bool anyNA = false; n = 1; std::vector uxp(nlevp, true); for(int i = temp.size(); i--; ) { if(temp[i] == NA_INTEGER) { anyNA = true; continue; } if(uxp[temp[i]]) { // save xi = x[i] ? Faster ? uxp[temp[i]] = false; if(++n == nlevp && anyNA) break; } } out[gr] = n + anyNA - 1; } } SEXP sym_label = Rf_install("label"); Rf_setAttrib(out, sym_label, Rf_getAttrib(x, sym_label)); return out; } } // No logical vector with sugar::IndexHash ! IntegerVector fndistinctLOGI(const LogicalVector& x, int ng, const IntegerVector& g, const SEXP& gs, bool narm) { int l = x.size(); if(ng == 0) { int Nunique = 0; if(narm) { bool which = true; for(int i = 0; i != l; ++i) { if(x[i] == NA_LOGICAL) continue; if(x[i] == which) { Nunique = 1; } else { which = x[i]; ++Nunique; if(Nunique == 2) break; } } } else { bool seen1 = true, seen2 = true, seen3 = true; for(int i = 0; i != l; ++i) { // better way? if(seen1 && x[i] == NA_LOGICAL) { ++Nunique; seen1 = false; } else if(seen2 && x[i] == true) { ++Nunique; seen2 = false; } else if(seen3 && x[i] == false) { ++Nunique; seen3 = false; } if(Nunique == 3) break; } } return Rf_ScalarInteger(Nunique); } else { if(l != g.size()) stop("length(g) must match length(x)"); IntegerVector out(ng); if(narm) { LogicalVector which(ng); int ngs = 0, gi; for(int i = 0; i != l; ++i) { gi = g[i]-1; if(x[i] == NA_LOGICAL || which[gi] == NA_LOGICAL) continue; if(x[i] == which[gi]) { out[gi] = 1; } else { which[gi] = x[i]; ++out[gi]; if(out[gi] == 2) { which[gi] = NA_LOGICAL; ++ngs; if(ngs == ng) break; } } } } else { LogicalVector seen1(ng, true), seen2(ng, true), seen3(ng, true); for(int i = 0; i != l; ++i) { // better way? int gi = g[i]-1; if(seen1[gi] && x[i] == NA_LOGICAL) { ++out[gi]; seen1[gi] = false; } else if(seen2[gi] && x[i] == true) { ++out[gi]; seen2[gi] = false; } else if(seen3[gi] && x[i] == false) { ++out[gi]; seen3[gi] = false; } } } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); // SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } } // [[Rcpp::export]] SEXP fndistinctCpp(const SEXP& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, bool narm = true) { switch(TYPEOF(x)) { case REALSXP: return fndistinctImpl(x, ng, g, gs, narm); case INTSXP: if(Rf_isFactor(x) && (ng == 0 || Rf_nlevels(x) < Rf_length(x) / ng * 3)) return fndistinctFACT(x, ng, g, gs, narm); return fndistinctImpl(x, ng, g, gs, narm); case STRSXP: return fndistinctImpl(x, ng, g, gs, narm); case LGLSXP: return fndistinctLOGI(x, ng, g, gs, narm); default: stop("Not supported SEXP type !"); } return R_NilValue; } // [[Rcpp::export]] SEXP fndistinctlCpp(const List& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, bool narm = true, bool drop = true) { int l = x.size(); List out(l); for(int j = l; j--; ) { switch(TYPEOF(x[j])) { case REALSXP: out[j] = fndistinctImpl(x[j], ng, g, gs, narm); break; case INTSXP: if(Rf_isFactor(x[j]) && (ng == 0 || Rf_nlevels(x[j]) < Rf_length(x[j]) / ng * 3)) out[j] = fndistinctFACT(x[j], ng, g, gs, narm); else out[j] = fndistinctImpl(x[j], ng, g, gs, narm); break; case STRSXP: out[j] = fndistinctImpl(x[j], ng, g, gs, narm); break; case LGLSXP: out[j] = fndistinctLOGI(x[j], ng, g, gs, narm); break; default: stop("Not supported SEXP type !"); } } if(drop && ng == 0) { IntegerVector res = no_init_vector(l); for(int i = l; i--; ) res[i] = out[i]; // Rf_coerceVector(out, INTSXP); // doesn't work Rf_setAttrib(res, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return res; } else { SHALLOW_DUPLICATE_ATTRIB(out, x); if(ng == 0) Rf_setAttrib(out, R_RowNamesSymbol, Rf_ScalarInteger(1)); else Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return out; } } template SEXP fndistinctmImpl(const Matrix& x, int ng, const IntegerVector& g, const SEXP& gs, bool narm, bool drop) { int l = x.nrow(), col = x.ncol(); typedef typename Rcpp::traits::storage_type::type storage_t; auto isnanT = (RTYPE == REALSXP) ? [](storage_t x) { return x != x; } : [](storage_t x) { return x == Vector::get_na(); }; unsigned int addr; if(ng == 0) { IntegerVector out = no_init_vector(col); if(narm) { for(int j = col; j--; ) { // ConstMatrixColumn column = x(_ , j); sugar::IndexHash hash(wrap(x(_ , j))); // wrap(column) // why wrap needed ? for(int i = 0; i != l; ++i) { storage_t val = hash.src[i]; if(isnanT(val)) continue; addr = hash.get_addr(val); while(hash.data[addr] && hash.not_equal(hash.src[hash.data[addr] - 1], val)) { ++addr; if(addr == static_cast(hash.m)) addr = 0; } if(!hash.data[addr]) { hash.data[addr] = i+1; ++hash.size_; } } out[j] = hash.size_; } } else { for(int j = col; j--; ) { // ConstMatrixColumn column = x(_ , j); sugar::IndexHash hash(wrap(x(_ , j))); // wrap(column) // why wrap needed ? hash.fill(); out[j] = hash.size_; } } if(drop) Rf_setAttrib(out, R_NamesSymbol, colnames(x)); else { Rf_dimgets(out, Dimension(1, col)); colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); } return out; } else { if(l != g.size()) stop("length(g) must match length(x)"); int ngp = ng+1; std::vector > gmap(ngp); IntegerMatrix out = no_init_matrix(ng, col); std::vector n(ngp); if(Rf_isNull(gs)) { // memset(n, 0, sizeof(int)*ng); for(int i = 0; i != l; ++i) ++n[g[i]]; for(int i = 1; i != ngp; ++i) { if(n[i] == 0) stop("Group size of 0 encountered. This is probably because of unused factor levels. Use fdroplevels(f) to drop them."); gmap[i] = std::vector (n[i]); } } else { IntegerVector gsv = gs; if(ng != gsv.size()) stop("ng must match length(gs)"); for(int i = 0; i != ng; ++i) { if(gsv[i] == 0) stop("Group size of 0 encountered. This is probably because of unused factor levels. Use fdroplevels(f) to drop them."); gmap[i+1] = std::vector (gsv[i]); } } if(narm) { for(int j = col; j--; ) { ConstMatrixColumn column = x(_ , j); IntegerMatrix::Column outj = out(_, j); n.assign(ngp, 0); for(int i = 0; i != l; ++i) gmap[g[i]][n[g[i]]++] = column[i]; // reading in all the values. Better way ? for(int gr = 0; gr != ng; ++gr) { // const std::vector& temp = gmap[gr+1]; // good ? // const Vector& // wrap() sugar::IndexHash hash(wrap(gmap[gr+1])); // wrap(temp) for(int i = hash.n; i--; ) { storage_t val = hash.src[i]; if(isnanT(val)) continue; addr = hash.get_addr(val); while(hash.data[addr] && hash.not_equal(hash.src[hash.data[addr] - 1], val)) { ++addr; if(addr == static_cast(hash.m)) addr = 0; } if(!hash.data[addr]) { hash.data[addr] = i+1; ++hash.size_; } } outj[gr] = hash.size_; } } } else { for(int j = col; j--; ) { ConstMatrixColumn column = x(_ , j); IntegerMatrix::Column outj = out(_, j); n.assign(ngp, 0); for(int i = 0; i != l; ++i) gmap[g[i]][n[g[i]]++] = column[i]; // reading in all the values. Better way ? for(int gr = 0; gr != ng; ++gr) { sugar::IndexHash hash(wrap(gmap[gr+1])); hash.fill(); outj[gr] = hash.size_; } } } colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); return out; } } template <> // No logical vector with sugar::IndexHash ! SEXP fndistinctmImpl(const Matrix& x, int ng, const IntegerVector& g, const SEXP& gs, bool narm, bool drop) { int l = x.nrow(), col = x.ncol(); if(ng == 0) { IntegerVector out(col); if(narm) { for(int j = col; j--; ) { LogicalMatrix::ConstColumn column = x(_ , j); bool which = true; for(int i = 0; i != l; ++i) { if(column[i] == NA_LOGICAL) continue; if(column[i] == which) { out[j] = 1; } else { which = column[i]; ++out[j]; if(out[j] == 2) break; } } } } else { for(int j = col; j--; ) { LogicalMatrix::ConstColumn column = x(_ , j); bool seen1 = true, seen2 = true, seen3 = true; for(int i = 0; i != l; ++i) { // better way? if(seen1 && column[i] == NA_LOGICAL) { ++out[j]; seen1 = false; } else if(seen2 && column[i] == true) { ++out[j]; seen2 = false; } else if(seen3 && column[i] == false) { ++out[j]; seen3 = false; } if(out[j] == 3) break; } } } if(drop) Rf_setAttrib(out, R_NamesSymbol, colnames(x)); else { Rf_dimgets(out, Dimension(1, col)); colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); } return out; } else { if(l != g.size()) stop("length(g) must match length(x)"); IntegerMatrix out(ng, col); // = no_init_matrix if(narm) { for(int j = col; j--; ) { LogicalMatrix::ConstColumn column = x(_ , j); IntegerMatrix::Column outj = out(_, j); LogicalVector which(ng); int ngs = 0, gi; for(int i = 0; i != l; ++i) { gi = g[i]-1; if(column[i] == NA_LOGICAL || which[gi] == NA_LOGICAL) continue; if(column[i] == which[gi]) { outj[gi] = 1; } else { which[gi] = column[i]; ++outj[gi]; if(outj[gi] == 2) { which[gi] = NA_LOGICAL; ++ngs; if(ngs == ng) break; } } } } } else { for(int j = col; j--; ) { LogicalMatrix::ConstColumn column = x(_ , j); IntegerMatrix::Column outj = out(_, j); LogicalVector seen1(ng, true), seen2(ng, true), seen3(ng, true); for(int i = 0; i != l; ++i) { // better way? int gi = g[i]-1; if(seen1[gi] && column[i] == NA_LOGICAL) { ++outj[gi]; seen1[gi] = false; } else if(seen2[gi] && column[i] == true) { ++outj[gi]; seen2[gi] = false; } else if(seen3[gi] && column[i] == false) { ++outj[gi]; seen3[gi] = false; } } } } colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); return out; } } template <> SEXP fndistinctmImpl(const Matrix& x, int ng, const IntegerVector& g, const SEXP& gs, bool narm, bool drop) { stop("Not supported SEXP type!"); } template <> SEXP fndistinctmImpl(const Matrix& x, int ng, const IntegerVector& g, const SEXP& gs, bool narm, bool drop) { stop("Not supported SEXP type!"); } template <> SEXP fndistinctmImpl(const Matrix& x, int ng, const IntegerVector& g, const SEXP& gs, bool narm, bool drop) { stop("Not supported SEXP type!"); } template <> SEXP fndistinctmImpl(const Matrix& x, int ng, const IntegerVector& g, const SEXP& gs, bool narm, bool drop) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] SEXP fndistinctmCpp(SEXP x, int ng = 0, IntegerVector g = 0, SEXP gs = R_NilValue, bool narm = true, bool drop = true) { RCPP_RETURN_MATRIX(fndistinctmImpl, x, ng, g, gs, narm, drop); } // This brings down code size from 180 kb to 168 kb, but about 25% slower than above.. // template // SEXP fndistinctmImpl(const Matrix& x, int ng, const IntegerVector& g, const SEXP& gs, bool narm, bool drop) { // int col = x.ncol(); // Matrix out = (ng == 0) ? no_init_matrix(1, col) : no_init_matrix(ng, col); // for(int j = col; j--; ) out(_, j) = fndistinctImpl(x(_, j), ng, g, gs, narm); // if(drop) { // Rf_setAttrib(out, R_DimSymbol, R_NilValue); // Rf_setAttrib(out, R_NamesSymbol, colnames(x)); // } else { // colnames(out) = colnames(x); // } // return out; // } // // // template <> // SEXP fndistinctmImpl(const Matrix& x, int ng, const IntegerVector& g, const SEXP& gs, bool narm, bool drop) { // stop("Not supported SEXP type!"); // } // // template <> // SEXP fndistinctmImpl(const Matrix& x, int ng, const IntegerVector& g, const SEXP& gs, bool narm, bool drop) { // stop("Not supported SEXP type!"); // } // // template <> // SEXP fndistinctmImpl(const Matrix& x, int ng, const IntegerVector& g, const SEXP& gs, bool narm, bool drop) { // stop("Not supported SEXP type!"); // } // // template <> // SEXP fndistinctmImpl(const Matrix& x, int ng, const IntegerVector& g, const SEXP& gs, bool narm, bool drop) { // stop("Not supported SEXP type!"); // } // // // // [[Rcpp::export]] // SEXP fndistinctmCpp(SEXP x, int ng = 0, IntegerVector g = 0, SEXP gs = R_NilValue, bool narm = true, bool drop = true) { // RCPP_RETURN_MATRIX(fndistinctmImpl, x, ng, g, gs, narm, drop); // } collapse/src/qF_qG.cpp0000644000176200001440000002305014174223734014353 0ustar liggesusers// // [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; template IntegerVector qFCppImpl(const Vector& x, bool ordered, bool na_exclude, bool keep_attr, int ret) { Vector levs = (na_exclude) ? na_omit(sort_unique(x)) : sort_unique(x); IntegerVector out = (na_exclude || RTYPE != REALSXP) ? match(x, levs) : as(Rf_match(levs, x, NA_INTEGER)); if(ret == 1) { // returning a factor if(keep_attr) SHALLOW_DUPLICATE_ATTRIB(out, x); // works for all atomic objects ? if(RTYPE == STRSXP) { Rf_setAttrib(out, R_LevelsSymbol, levs); } else { Rf_setAttrib(out, R_LevelsSymbol, Rf_coerceVector(levs, STRSXP)); // What about date objects... } Rf_classgets(out, (ordered && !na_exclude) ? CharacterVector::create("ordered","factor","na.included") : ordered ? CharacterVector::create("ordered","factor") : (!na_exclude) ? CharacterVector::create("factor","na.included") : CharacterVector::create("factor")); } else { // returnin a qG out.attr("N.groups") = int(levs.size()); if(ret == 3) { Rf_copyMostAttrib(x, levs); out.attr("groups") = levs; } Rf_classgets(out, (ordered && !na_exclude) ? CharacterVector::create("ordered","qG","na.included") : ordered ? CharacterVector::create("ordered","qG") : (!na_exclude) ? CharacterVector::create("qG","na.included") : CharacterVector::create("qG")); } return out; } // [[Rcpp::export]] // do Cpp 11 solution using return macro ? SEXP qFCpp(SEXP x, bool ordered = true, bool na_exclude = true, bool keep_attr = true, int ret = 1) { switch(TYPEOF(x)) { case INTSXP: return qFCppImpl(x, ordered, na_exclude, keep_attr, ret); case REALSXP: return qFCppImpl(x, ordered, na_exclude, keep_attr, ret); case STRSXP: return qFCppImpl(x, ordered, na_exclude, keep_attr, ret); case LGLSXP: { // Note that this always sorts it LogicalVector xl = x; int l = xl.size(); LogicalVector nd(3); IntegerVector out = no_init_vector(l); if(na_exclude) { for(int i = 0; i != l; ++i) { if(xl[i] == NA_LOGICAL) { out[i] = NA_INTEGER; } else if(xl[i] == true) { out[i] = 2; nd[1] = true; } else { out[i] = 1; nd[0] = true; } } if(!nd[0]) for(int i = l; i--; ) if(out[i] == 2) out[i] = 1; // no FALSE // otherwise malformed factor.. only 2 level but not 1 level } else { for(int i = 0; i != l; ++i) { if(xl[i] == NA_LOGICAL) { out[i] = 3; nd[2] = true; } else if(xl[i] == true) { out[i] = 2; nd[1] = true; } else { out[i] = 1; nd[0] = true; } } if(!nd[0] || (nd[2] && !nd[1])) { if(!nd[0]) { // no FALSE if(nd[1]) { // has TRUE (and NA) out = out - 1; } else { // only has NA out = out - 2; } } else { // NA and no TRUE for(int i = l; i--; ) if(out[i] == 3) out[i] = 2; } } } if(ret == 1) { // return factor if(keep_attr) SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_LevelsSymbol, CharacterVector::create("FALSE", "TRUE", NA_STRING)[nd]); Rf_classgets(out, (ordered && !na_exclude) ? CharacterVector::create("ordered","factor","na.included") : ordered ? CharacterVector::create("ordered","factor") : (!na_exclude) ? CharacterVector::create("factor","na.included") : CharacterVector::create("factor")); } else { out.attr("N.groups") = int(nd[0]+nd[1]+nd[2]); if(ret == 3) { LogicalVector groups = LogicalVector::create(false, true, NA_LOGICAL)[nd]; Rf_copyMostAttrib(x, groups); out.attr("groups") = groups; } Rf_classgets(out, (ordered && !na_exclude) ? CharacterVector::create("ordered","qG","na.included") : ordered ? CharacterVector::create("ordered","qG") : (!na_exclude) ? CharacterVector::create("qG","na.included") : CharacterVector::create("qG")); } return out; } default: stop("Not Supported SEXP Type"); } return R_NilValue; } // TODO: could still remove NA, and also for sort_unique template Vector uniqueord(const Vector& x) { sugar::IndexHash hash(x); hash.fill(); // int l = x.size(); // almost same speed as member fill. // for(int i = 0; i != l; ++i) { // unsigned int addr = hash.get_addr(hash.src[i]); // while(hash.data[addr] && hash.not_equal(hash.src[hash.data[addr] - 1], hash.src[i])) { // ++addr; // if(addr == static_cast(hash.m)) addr = 0; // } // if(!hash.data[addr]) { // hash.data[addr] = i+1; // ++hash.size_; // } // } int hs = hash.size_; IntegerVector ord = no_init_vector(hs); for(int i = 0, j = 0; j < hs; i++) if(hash.data[i]) ord[j++] = hash.data[i]-1; std::sort(ord.begin(), ord.end()); Vector res = no_init_vector(hs); for(int i = 0; i < hs; ++i) res[i] = hash.src[ord[i]]; // inline Vector keys() const{ // Vector res = no_init(size_) ; // for( int i=0, j=0; j Vector funiqueImpl(const Vector& x, bool sort) { if(sort) { Vector out = sort_unique(x); Rf_copyMostAttrib(x, out); return out; } else { Vector out = uniqueord(x); Rf_copyMostAttrib(x, out); return out; } } IntegerVector funiqueFACT(const IntegerVector& x, bool sort = true) { int nlevp = Rf_nlevels(x)+1, l = x.size(), k = 0; std::vector not_seen(nlevp, true); bool countNA = true; if(sort) { for(int i = 0; i != l; ++i) { if(x[i] == NA_INTEGER) { if(countNA) { ++k; countNA = false; } continue; } if(not_seen[x[i]]) { not_seen[x[i]] = false; if(++k == nlevp) break; } } IntegerVector out = no_init_vector(k); if(!countNA) out[k-1] = NA_INTEGER; k = 0; for(int i = 1; i != nlevp; ++i) if(!not_seen[i]) out[k++] = i; Rf_copyMostAttrib(x, out); return out; } else { IntegerVector uxp = no_init_vector(nlevp); for(int i = 0; i != l; ++i) { if(x[i] == NA_INTEGER) { if(countNA) { uxp[k++] = NA_INTEGER; countNA = false; } continue; } if(not_seen[x[i]]) { uxp[k++] = x[i]; if(k == nlevp) { Rf_copyMostAttrib(x, uxp); return uxp; } not_seen[x[i]] = false; } } IntegerVector out = no_init_vector(k); for(int i = 0; i != k; ++i) out[i] = uxp[i]; Rf_copyMostAttrib(x, out); return out; } } // [[Rcpp::export]] SEXP funiqueCpp(SEXP x, bool sort = true) { switch(TYPEOF(x)) { case INTSXP: if(Rf_isFactor(x)) return funiqueFACT(x, sort); return funiqueImpl(x, sort); case REALSXP: return funiqueImpl(x, sort); case STRSXP: return funiqueImpl(x, sort); case LGLSXP: { LogicalVector xl = x; int nc = 0, n0 = 0, n1 = 0, n2 = 0, l = xl.size(); for(int i = 0; i != l; ++i) { if(n2 == 0 && xl[i] == NA_LOGICAL) { n2 = ++nc; } else if(n1 == 0 && xl[i] == true) { n1 = ++nc; } else if(n0 == 0 && xl[i] == false) { n0 = ++nc; } if(nc == 3) break; } LogicalVector out = no_init_vector(nc); if(sort) { nc = 0; if(n0) out[nc++] = false; if(n1) out[nc++] = true; if(n2) out[nc] = NA_LOGICAL; } else { if(n0) out[n0-1] = false; if(n1) out[n1-1] = true; if(n2) out[n2-1] = NA_LOGICAL; } // LogicalVector::create(false, true, NA_LOGICAL)[nd]; Rf_copyMostAttrib(x, out); return out; } default: stop("Not Supported SEXP Type"); } return R_NilValue; } // [[Rcpp::export]] IntegerVector fdroplevelsCpp(const IntegerVector& x, bool check_NA = true) { int nlevp = Rf_nlevels(x)+1, l = x.size(), n = 1; std::vector uxp(nlevp, 1); // 1 is also true ! bool anyNA = false; if(check_NA) { for(int i = 0; i != l; ++i) { if(x[i] == NA_INTEGER) { anyNA = true; continue; } if(uxp[x[i]]) { uxp[x[i]] = 0; if(++n == nlevp) return x; } // uxp[x[i]] = 1; // Runs through, slower than above on DHS Uganda (660 factors) } } else { for(int i = 0; i != l; ++i) { if(uxp[x[i]]) { uxp[x[i]] = 0; if(++n == nlevp) return x; } // uxp[x[i]] = 1; // Runs through, slower than above on DHS Uganda (660 factors) } } // n = std::accumulate(uxp.begin()+1, uxp.end(), 0); // if(n == nlevp-1) return x; CharacterVector levs = Rf_getAttrib(x, R_LevelsSymbol); CharacterVector newlevs = no_init_vector(n-1); // n n = 0; for(int i = 1; i != nlevp; ++i) { if(!uxp[i]) { newlevs[n] = levs[i-1]; uxp[i] = ++n; } } IntegerVector out = no_init_vector(l); // fastest solution ! // IntegerVector out = anyNA ? IntegerVector(l, NA_INTEGER) : no_init_vector(l); // Not faster !! if(anyNA) { // for(int i = 0; i != l; ++i) if(x[i] != NA_INTEGER) out[i] = uxp[x[i]]; for(int i = 0; i != l; ++i) out[i] = (x[i] == NA_INTEGER) ? NA_INTEGER : uxp[x[i]]; } else { for(int i = 0; i != l; ++i) out[i] = uxp[x[i]]; } SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_LevelsSymbol, newlevs); return out; } collapse/src/ExportSymbols.cpp0000644000176200001440000002367514174223734016225 0ustar liggesusers#include "collapse.h" #include using namespace Rcpp; // prefix with RcppExport ? -> Yes, necessary ! RcppExport void multi_yw(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); RcppExport SEXP collapse_init(SEXP); RcppExport SEXP dt_na(SEXP, SEXP); RcppExport SEXP allNAv(SEXP, SEXP); RcppExport SEXP Cradixsort(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); RcppExport SEXP frankds(SEXP, SEXP, SEXP, SEXP); RcppExport SEXP pacf1(SEXP, SEXP); RcppExport SEXP rbindlist(SEXP, SEXP, SEXP, SEXP); RcppExport SEXP setcolorder(SEXP, SEXP); RcppExport SEXP subsetDT(SEXP, SEXP, SEXP, SEXP); RcppExport SEXP subsetCols(SEXP, SEXP, SEXP); RcppExport SEXP subsetVector(SEXP, SEXP, SEXP); RcppExport SEXP Calloccol(SEXP dt); // , SEXP Rn RcppExport SEXP falloc(SEXP, SEXP); // RcppExport SEXP CasChar(SEXP x); RcppExport SEXP setAttributes(SEXP x, SEXP a); RcppExport void setattributes(SEXP x, SEXP a); // RcppExport SEXP CsetAttr(SEXP object, SEXP a, SEXP v); -> mot more efficeint than attr i.e. for row.names... // RcppExport void setattr(SEXP x, SEXP a, SEXP v); RcppExport SEXP duplAttributes(SEXP x, SEXP y); // RcppExport void duplattributes(SEXP x, SEXP y); // RcppExport SEXP cond_duplAttributes(SEXP x, SEXP y); RcppExport SEXP CsetAttrib(SEXP object, SEXP a); RcppExport SEXP CcopyAttrib(SEXP to, SEXP from); RcppExport SEXP CcopyMostAttrib(SEXP to, SEXP from); RcppExport SEXP copyMostAttributes(SEXP to, SEXP from); RcppExport SEXP lassign(SEXP x, SEXP s, SEXP rows, SEXP fill); RcppExport SEXP groups2GRP(SEXP x, SEXP lx, SEXP gs); RcppExport SEXP gsplit(SEXP x, SEXP gobj, SEXP toint); RcppExport SEXP greorder(SEXP x, SEXP gobj); RcppExport SEXP Cna_rm(SEXP x); RcppExport SEXP whichv(SEXP x, SEXP val, SEXP Rinvert); RcppExport SEXP anyallv(SEXP x, SEXP val, SEXP Rall); RcppExport SEXP setcopyv(SEXP x, SEXP val, SEXP rep, SEXP Rinvert, SEXP Rset, SEXP Rind1); RcppExport SEXP setop(SEXP x, SEXP val, SEXP op, SEXP roww); RcppExport SEXP vtypes(SEXP x, SEXP isnum); RcppExport SEXP vlengths(SEXP x, SEXP usenam); RcppExport SEXP multiassign(SEXP lhs, SEXP rhs, SEXP envir); RcppExport SEXP vlabels(SEXP x, SEXP attrn, SEXP usenam); RcppExport SEXP setvlabels(SEXP x, SEXP attrn, SEXP value, SEXP ind); RcppExport SEXP setnames(SEXP x, SEXP nam); RcppExport SEXP groupVec(SEXP X, SEXP starts, SEXP sizes); RcppExport SEXP groupAtVec(SEXP X, SEXP starts, SEXP naincl); RcppExport SEXP geteptr(SEXP x); // fnobs rewritten in C: RcppExport SEXP fnobsC(SEXP x, SEXP Rng, SEXP g); RcppExport SEXP fnobsmC(SEXP x, SEXP Rng, SEXP g, SEXP Rdrop); RcppExport SEXP fnobslC(SEXP x, SEXP Rng, SEXP g, SEXP Rdrop); // ffirst and flast rewritten in C: RcppExport SEXP ffirstC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm); RcppExport SEXP ffirstmC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop); RcppExport SEXP ffirstlC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm); RcppExport SEXP flastC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm); RcppExport SEXP flastmC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop); RcppExport SEXP flastlC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm); // fsum rewritten in C: RcppExport SEXP fsumC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm); RcppExport SEXP fsummC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop); RcppExport SEXP fsumlC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop); // fmin and fmax rewritten in C: RcppExport SEXP fminC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm); RcppExport SEXP fminmC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop); RcppExport SEXP fminlC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop); RcppExport SEXP fmaxC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm); RcppExport SEXP fmaxmC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop); RcppExport SEXP fmaxlC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop); // Added fcumsum, written in C: RcppExport SEXP fcumsumC(SEXP x, SEXP Rng, SEXP g, SEXP o, SEXP Rnarm, SEXP Rfill); RcppExport SEXP fcumsummC(SEXP x, SEXP Rng, SEXP g, SEXP o, SEXP Rnarm, SEXP Rfill); RcppExport SEXP fcumsumlC(SEXP x, SEXP Rng, SEXP g, SEXP o, SEXP Rnarm, SEXP Rfill); static const R_CMethodDef CEntries[] = { {"C_multi_yw", (DL_FUNC) &multi_yw, 10}, {NULL, NULL, 0} }; static const R_CallMethodDef CallEntries[] = { {"Cpp_BW", (DL_FUNC) &_collapse_BWCpp, 10}, {"Cpp_BWm", (DL_FUNC) &_collapse_BWmCpp, 10}, {"Cpp_BWl", (DL_FUNC) &_collapse_BWlCpp, 10}, {"Cpp_TRA", (DL_FUNC) &_collapse_TRACpp, 4}, {"Cpp_TRAm", (DL_FUNC) &_collapse_TRAmCpp, 4}, {"Cpp_TRAl", (DL_FUNC) &_collapse_TRAlCpp, 4}, {"Cpp_fndistinct", (DL_FUNC) &_collapse_fndistinctCpp, 5}, {"Cpp_fndistinctl", (DL_FUNC) &_collapse_fndistinctlCpp, 6}, {"Cpp_fndistinctm", (DL_FUNC) &_collapse_fndistinctmCpp, 6}, {"Cpp_pwnobsm", (DL_FUNC) &_collapse_pwnobsmCpp, 1}, {"C_fnobs", (DL_FUNC) &fnobsC, 3}, {"C_fnobsm", (DL_FUNC) &fnobsmC, 4}, {"C_fnobsl", (DL_FUNC) &fnobslC, 4}, {"Cpp_varying", (DL_FUNC) &_collapse_varyingCpp, 4}, {"Cpp_varyingm", (DL_FUNC) &_collapse_varyingmCpp, 5}, {"Cpp_varyingl", (DL_FUNC) &_collapse_varyinglCpp, 5}, {"Cpp_fbstats", (DL_FUNC) &_collapse_fbstatsCpp, 11}, {"Cpp_fbstatsm", (DL_FUNC) &_collapse_fbstatsmCpp, 10}, {"Cpp_fbstatsl", (DL_FUNC) &_collapse_fbstatslCpp, 10}, {"C_ffirst", (DL_FUNC) &ffirstC, 4}, {"C_ffirstm", (DL_FUNC) &ffirstmC, 5}, {"C_ffirstl", (DL_FUNC) &ffirstlC, 4}, {"Cpp_fdiffgrowth", (DL_FUNC) &_collapse_fdiffgrowthCpp, 12}, {"Cpp_fdiffgrowthm", (DL_FUNC) &_collapse_fdiffgrowthmCpp, 12}, {"Cpp_fdiffgrowthl", (DL_FUNC) &_collapse_fdiffgrowthlCpp, 12}, {"Cpp_flaglead", (DL_FUNC) &_collapse_flagleadCpp, 7}, {"Cpp_flagleadm", (DL_FUNC) &_collapse_flagleadmCpp, 7}, {"Cpp_flagleadl", (DL_FUNC) &_collapse_flagleadlCpp, 7}, {"C_flast", (DL_FUNC) &flastC, 4}, {"C_flastm", (DL_FUNC) &flastmC, 5}, {"C_flastl", (DL_FUNC) &flastlC, 4}, {"C_fmin", (DL_FUNC) &fminC, 4}, {"C_fminm", (DL_FUNC) &fminmC, 5}, {"C_fminl", (DL_FUNC) &fminlC, 5}, {"C_fmax", (DL_FUNC) &fmaxC, 4}, {"C_fmaxm", (DL_FUNC) &fmaxmC, 5}, {"C_fmaxl", (DL_FUNC) &fmaxlC, 5}, {"Cpp_fmean", (DL_FUNC) &_collapse_fmeanCpp, 6}, {"Cpp_fmeanm", (DL_FUNC) &_collapse_fmeanmCpp, 7}, {"Cpp_fmeanl", (DL_FUNC) &_collapse_fmeanlCpp, 7}, {"Cpp_fnth", (DL_FUNC) &_collapse_fnthCpp, 8}, {"Cpp_fnthm", (DL_FUNC) &_collapse_fnthmCpp, 9}, {"Cpp_fnthl", (DL_FUNC) &_collapse_fnthlCpp, 9}, {"Cpp_fmode", (DL_FUNC) &_collapse_fmodeCpp, 7}, {"Cpp_fmodel", (DL_FUNC) &_collapse_fmodelCpp, 7}, {"Cpp_fmodem", (DL_FUNC) &_collapse_fmodemCpp, 8}, {"Cpp_fprod", (DL_FUNC) &_collapse_fprodCpp, 5}, {"Cpp_fprodm", (DL_FUNC) &_collapse_fprodmCpp, 6}, {"Cpp_fprodl", (DL_FUNC) &_collapse_fprodlCpp, 6}, {"Cpp_fscale", (DL_FUNC) &_collapse_fscaleCpp, 7}, {"Cpp_fscalem", (DL_FUNC) &_collapse_fscalemCpp, 7}, {"Cpp_fscalel", (DL_FUNC) &_collapse_fscalelCpp, 7}, {"C_fsum", (DL_FUNC) &fsumC, 5}, {"C_fsumm", (DL_FUNC) &fsummC, 6}, {"C_fsuml", (DL_FUNC) &fsumlC, 6}, {"Cpp_fvarsd", (DL_FUNC) &_collapse_fvarsdCpp, 8}, {"Cpp_fvarsdm", (DL_FUNC) &_collapse_fvarsdmCpp, 9}, {"Cpp_fvarsdl", (DL_FUNC) &_collapse_fvarsdlCpp, 9}, {"Cpp_mrtl", (DL_FUNC) &_collapse_mrtl, 3}, {"Cpp_mctl", (DL_FUNC) &_collapse_mctl, 3}, {"Cpp_psmat", (DL_FUNC) &_collapse_psmatCpp, 4}, {"Cpp_qF", (DL_FUNC) &_collapse_qFCpp, 5}, // {"Cpp_qG", (DL_FUNC) &_collapse_qGCpp, 5}, {"Cpp_funique", (DL_FUNC) &_collapse_funiqueCpp, 2}, {"Cpp_fdroplevels", (DL_FUNC) &_collapse_fdroplevelsCpp, 2}, {"C_setAttributes", (DL_FUNC) &setAttributes, 2}, {"C_setattributes", (DL_FUNC) &setattributes, 2}, // {"C_setAttr", (DL_FUNC) &CsetAttr, 3}, // {"C_setattr", (DL_FUNC) &setattr, 3}, {"C_duplAttributes", (DL_FUNC) &duplAttributes, 2}, // {"C_duplattributes", (DL_FUNC) &duplattributes, 2}, // {"C_cond_duplAttributes", (DL_FUNC) &cond_duplAttributes, 2}, {"C_copyMostAttributes", (DL_FUNC) ©MostAttributes, 2}, // {"C_cond_duplattributes", (DL_FUNC) &cond_duplattributes, 2}, {"C_setAttrib", (DL_FUNC) &CsetAttrib, 2}, {"C_copyAttrib", (DL_FUNC) &CcopyAttrib, 2}, {"C_copyMostAttrib", (DL_FUNC) &CcopyMostAttrib, 2}, {"C_groups2GRP", (DL_FUNC) &groups2GRP, 3}, {"C_gsplit", (DL_FUNC) &gsplit, 3}, {"C_greorder", (DL_FUNC) &greorder, 2}, {"C_lassign", (DL_FUNC) &lassign, 4}, {"Cpp_seqid", (DL_FUNC) &_collapse_seqid, 7}, {"Cpp_groupid", (DL_FUNC) &_collapse_groupid, 5}, {"C_collapse_init", (DL_FUNC) &collapse_init, 1}, {"C_dt_na", (DL_FUNC) &dt_na, 2}, {"C_allNA", (DL_FUNC) &allNAv, 2}, {"C_na_rm", (DL_FUNC) &Cna_rm, 1}, {"C_whichv", (DL_FUNC) &whichv, 3}, {"C_anyallv", (DL_FUNC) &anyallv, 3}, {"C_setcopyv", (DL_FUNC) &setcopyv, 6}, {"C_setop", (DL_FUNC) &setop, 4}, {"C_vtypes", (DL_FUNC) &vtypes, 2}, {"C_vlengths", (DL_FUNC) &vlengths, 2}, {"C_multiassign", (DL_FUNC) &multiassign, 3}, {"C_vlabels", (DL_FUNC) &vlabels, 3}, {"C_setvlabels", (DL_FUNC) &setvlabels, 4}, {"C_setnames", (DL_FUNC) &setnames, 2}, {"C_group", (DL_FUNC) &groupVec, 3}, {"C_groupat", (DL_FUNC) &groupAtVec, 3}, {"C_radixsort", (DL_FUNC) &Cradixsort, 6}, {"C_frankds", (DL_FUNC) &frankds, 4}, {"C_pacf1", (DL_FUNC) &pacf1, 2}, {"C_rbindlist", (DL_FUNC) &rbindlist, 4}, {"C_setcolorder", (DL_FUNC) &setcolorder, 2}, {"C_subsetCols", (DL_FUNC) &subsetCols, 3}, {"C_alloc", (DL_FUNC) &falloc, 2}, {"C_geteptr", (DL_FUNC) &geteptr, 1}, // {"C_aschar", (DL_FUNC) &CasChar, 1}, {"C_subsetDT", (DL_FUNC) &subsetDT, 4}, {"C_subsetVector", (DL_FUNC) &subsetVector, 3}, {"C_alloccol", (DL_FUNC) &Calloccol, 1}, {"C_fcumsum", (DL_FUNC) &fcumsumC, 6}, {"C_fcumsumm", (DL_FUNC) &fcumsummC, 6}, {"C_fcumsuml", (DL_FUNC) &fcumsumlC, 6}, {NULL, NULL, 0} }; RcppExport void R_init_collapse(DllInfo *dll) { R_registerRoutines(dll, CEntries, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); R_forceSymbols(dll, TRUE); } collapse/src/fdiff_fgrowth.cpp0000644000176200001440000024241114174223734016200 0ustar liggesusers// [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; // Note: Now taking logs in R -> Faster and smaller compiled code !! /* some systems get this wrong, possibly depend on what libs are loaded */ // static inline double R_log(double x) { // return x > 0 ? log(x) : x == 0 ? R_NegInf : R_NaN; // } // new setup: ret = 1L - differences, ret = 2L - log differences, ret = 3L - exact growth rates, ret = 4L - log-difference growth // also: if rho != 1, quasi-differencing and log differencing with rho... i.e. for cochrane-orcutt regression // This Approach: Hybrid: Currently does not support iterated differences on irregular time-series and panel data !! // TODO: Make comprehensive... template NumericVector fdiffgrowthCppImpl(const NumericVector& x, const IntegerVector& n = 1, const IntegerVector& diff = 1, double fill = NA_REAL, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& t = R_NilValue, std::string stub = "", bool names = true, F FUN = [](double y, double x) { return y-x; }) { int l = x.size(), ns = n.size(), ds = diff.size(), zeros = 0, pos = INT_MAX; IntegerVector absn = no_init_vector(ns); for(int i = ns; i--; ) { if(n[i] == pos) stop("duplicated values in n detected"); // because one might mistakenly pass a factor to the n-slot pos = n[i]; if(pos == 0) ++zeros; if(pos < 0) { if(pos == NA_INTEGER) stop("NA in n"); absn[i] = -pos; } else absn[i] = pos; } pos = 0; std::string stub2 = names ? "F" + stub : ""; int ncol = (ns-zeros)*ds+zeros; if(ncol == 1) names = false; NumericMatrix out = no_init_matrix(l, ncol); CharacterVector colnam = names ? no_init_vector(ncol) : no_init_vector(1); CharacterVector nc = names ? Rf_coerceVector(absn, STRSXP) : NA_STRING; CharacterVector diffc = names ? Rf_coerceVector(diff, STRSXP) : NA_STRING; if(ng == 0) { // No groups if(Rf_isNull(t)) { // Ordered data for(int p = 0; p != ns; ++p) { int np = n[p]; if(np>0) { // Positive lagged and iterated differences int d1 = diff[0], end = np*d1; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end >= l) stop("n * diff needs to be < length(x)"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(L1) colnam[pos] = stub + diffc[0]; else colnam[pos] = "L" + nc[p] + stub + diffc[0]; } ++pos; for(int i = np; i != l; ++i) outp[i] = FUN(x[i], x[i - np]); if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1)-1; for(int i = l-1; i != start; --i) outp[i] = FUN(outp[i], outp[i - np]); } for(int i = end; i--; ) outp[i] = fill; if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = np*dq; if(end >= l) stop("n * diff needs to be < length(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1)-1; for(int i = l-1; i != start; --i) outtemp[i] = FUN(outtemp[i], outtemp[i - np]); } for(int i = np*L_dq; i != end; ++i) outtemp[i] = fill; out( _ , pos) = outtemp; if(names) { if(L1) colnam[pos] = stub + diffc[q]; else colnam[pos] = "L" + nc[p] + stub + diffc[q]; } ++pos; } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0], end = l+np*d1; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end <= 0) stop("abs(n * diff) needs to be < length(x)"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(F1) colnam[pos] = stub2 + diffc[0]; else colnam[pos] = "F" + nc[p] + stub + diffc[0]; } ++pos; for(int i = l+np; i--; ) outp[i] = FUN(x[i], x[i - np]); if(d1 > 1) for(int k = 1; k != d1; ++k) { int final = l+np*(k+1); for(int i = 0; i != final; ++i) outp[i] = FUN(outp[i], outp[i - np]); } for(int i = end; i != l; ++i) outp[i] = fill; if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = l+np*dq, start = l+np*L_dq; if(end <= 0) stop("abs(n * diff) needs to be < length(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int final = l+np*(k+1); for(int i = 0; i != final; ++i) outtemp[i] = FUN(outtemp[i], outtemp[i - np]); } for(int i = end; i != start; ++i) outtemp[i] = fill; out( _ , pos) = outtemp; if(names) { if(F1) colnam[pos] = stub2 + diffc[q]; else colnam[pos] = "F"+ nc[p] + stub + diffc[q]; } ++pos; } } } else { out( _ , pos) = x; if(names) colnam[pos] = "--"; ++pos; } } } else { // Unordered data: Timevar provided IntegerVector ord = t; if(l != ord.size()) stop("length(x) must match length(t)"); int min = INT_MAX, max = INT_MIN, osize, temp; for(int i = 0; i != l; ++i) { if(ord[i] < min) min = ord[i]; if(ord[i] > max) max = ord[i]; } if(min == NA_INTEGER) stop("Timevar contains missing values"); osize = max-min+1; bool regular = osize == l; IntegerVector omap(osize), ord2 = regular ? no_init_vector(1) : no_init_vector(l); if(!regular) { // Irregular time series if(osize > 10000000 && osize > 3 * l) warning("Your time series is very irregular. Need to create an internal ordering vector of length %s to represent it.", osize); if(Rcpp::max(diff) > 1) stop("Iterations are currently only supported for regular time series. See ?seqid to identify the regular sequences in your time series, or just apply this function multiple times."); for(int i = 0; i != l; ++i) { temp = ord[i] - min; // Best ? Or direct assign to ord2[i] ? Also check for panel version.. if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i+1; ord2[i] = temp; } } else { // Regular time series for(int i = 0; i != l; ++i) { temp = ord[i] - min; if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i; } } for(int p = 0; p != ns; ++p) { int np = n[p]; if(np>0) { // Positive lagged and iterated differences int d1 = diff[0], end = np*d1; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end >= l) stop("n * diff needs to be < length(x)"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(L1) colnam[pos] = stub + diffc[0]; else colnam[pos] = "L" + nc[p] + stub + diffc[0]; } ++pos; if(regular) { for(int i = np; i != l; ++i) outp[omap[i]] = FUN(x[omap[i]], x[omap[i - np]]); } else { for(int i = 0; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] >= np && (temp = omap[ord2[i] - np])) { outp[i] = FUN(x[i], x[temp-1]); } else { outp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1)-1; for(int i = l-1; i != start; --i) outp[omap[i]] = FUN(outp[omap[i]], outp[omap[i - np]]); } if(regular) for(int i = end; i--; ) outp[omap[i]] = fill; if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = np*dq; if(end >= l) stop("n * diff needs to be < length(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1)-1; for(int i = l-1; i != start; --i) outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); } for(int i = np*L_dq; i != end; ++i) outtemp[omap[i]] = fill; out( _ , pos) = outtemp; if(names) { if(L1) colnam[pos] = stub + diffc[q]; else colnam[pos] = "L" + nc[p] + stub + diffc[q]; } ++pos; } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0], end = l+np*d1; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end <= 0) stop("abs(n * diff) needs to be < length(x)"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(F1) colnam[pos] = stub2 + diffc[0]; else colnam[pos] = "F" + nc[p] + stub + diffc[0]; } ++pos; if(regular) { for(int i = l+np; i--; ) outp[omap[i]] = FUN(x[omap[i]], x[omap[i - np]]); } else { for(int i = 0, osnp = osize + np; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] < osnp && (temp = omap[ord2[i] - np])) { outp[i] = FUN(x[i], x[temp-1]); } else { outp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int final = l+np*(k+1); for(int i = 0; i != final; ++i) outp[omap[i]] = FUN(outp[omap[i]], outp[omap[i - np]]); } if(regular) for(int i = end; i != l; ++i) outp[omap[i]] = fill; if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = l+np*dq, start = l+np*L_dq; if(end <= 0) stop("abs(n * diff) needs to be < length(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int final = l+np*(k+1); for(int i = 0; i != final; ++i) outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); } for(int i = end; i != start; ++i) outtemp[omap[i]] = fill; out( _ , pos) = outtemp; if(names) { if(F1) colnam[pos] = stub2 + diffc[q]; else colnam[pos] = "F"+ nc[p] + stub + diffc[q]; } ++pos; } } } else { out( _ , pos) = x; if(names) colnam[pos] = "--"; ++pos; } } } } else { if(l != g.size()) stop("length(x) must match length(g)"); int ags = l/ng, ngp = ng+1, maxdiff = max(diff); if(Rf_isNull(t)) { bool cond = !Rf_isNull(gs); IntegerVector gsv = (cond || maxdiff == 1) ? no_init_vector(1) : IntegerVector(ng); int *pgsv = cond ? INTEGER(gs)-1 : INTEGER(gsv)-1; if(maxdiff != 1) { if(cond) { if(ng != Rf_length(gs)) stop("ng must match length(gs)"); } else { for(int i = 0; i != l; ++i) ++pgsv[g[i]]; } } // int seen[ngp], memsize = sizeof(int)*(ngp); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p]*maxdiff > ags) warning("abs(n * diff) exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { // Positive lagged and iterated differences int d1 = diff[0]; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericMatrix::Column outp = out( _ , pos); std::vector seen(ngp); // memset(seen, 0, memsize); if(names) { if(L1) colnam[pos] = stub + diffc[0]; else colnam[pos] = "L" + nc[p] + stub + diffc[0]; } ++pos; for(int i = 0; i != l; ++i) { if(seen[g[i]] == np) outp[i] = FUN(x[i], x[i - np]); else { outp[i] = fill; ++seen[g[i]]; } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = l; i--; ) { if(seen[g[i]] == pgsv[g[i]]-start) outp[i] = fill; else { outp[i] = FUN(outp[i], outp[i - np]); ++seen[g[i]]; } } } if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); // Right ? -> seems so std::vector seen(ngp); // memset(seen, 0, memsize); // Needed, because it loops from the beginning for(int i = l; i--; ) { if(seen[g[i]] == pgsv[g[i]]-start) outtemp[i] = fill; else { outtemp[i] = FUN(outtemp[i], outtemp[i - np]); ++seen[g[i]]; } } } out( _ , pos) = outtemp; if(names) { if(L1) colnam[pos] = stub + diffc[q]; else colnam[pos] = "L" + nc[p] + stub + diffc[q]; } ++pos; } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0]; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericMatrix::Column outp = out( _ , pos); std::vector seen(ngp); // memset(seen, 0, memsize); if(names) { if(F1) colnam[pos] = stub2 + diffc[0]; else colnam[pos] = "F" + nc[p] + stub + diffc[0]; } ++pos; for(int i = l; i--; ) { if(seen[g[i]] == np) outp[i] = FUN(x[i], x[i - np]); else { outp[i] = fill; --seen[g[i]]; } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[i]] == pgsv[g[i]]+start) outp[i] = fill; else { outp[i] = FUN(outp[i], outp[i - np]); ++seen[g[i]]; } } } if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[i]] == pgsv[g[i]]+start) outtemp[i] = fill; else { outtemp[i] = FUN(outtemp[i], outtemp[i - np]); ++seen[g[i]]; } } } out( _ , pos) = outtemp; if(names) { if(F1) colnam[pos] = stub2 + diffc[q]; else colnam[pos] = "F"+ nc[p] + stub + diffc[q]; } ++pos; } } } else { out( _ , pos) = x; if(names) colnam[pos] = "--"; ++pos; } } } else { // Unordered data: Timevar Provided IntegerVector ord = t; int temp; if(l != ord.size()) stop("length(x) must match length(t)"); IntegerVector min(ngp, INT_MAX), max(ngp, INT_MIN), cgs = no_init_vector(ngp); for(int i = 0; i != l; ++i) { temp = g[i]; if(ord[i] < min[temp]) min[temp] = ord[i]; if(ord[i] > max[temp]) max[temp] = ord[i]; } temp = 0; for(int i = 1; i != ngp; ++i) { if(min[i] == NA_INTEGER) stop("Timevar contains missing values"); if(min[i] == INT_MAX) continue; // Needed in case of unused factor levels (group vector too large) cgs[i] = temp; // This needs to b here (for unused factor levels case...) max[i] -= min[i] - 1; // need max[i] which stores the complete group sizes only if p<0 e.g. if computing leads.. temp += max[i]; } // omap provides the ordering to order the vector (needed to find previous / next values) bool regular = temp == l; IntegerVector omap(temp), ord2 = no_init_vector(l); if(!regular) { // Irregular panel if(temp > 10000000 && temp > 3 * l) warning("Your panel is very irregular. Need to create an internal ordering vector of length %s to represent it.", temp); if(maxdiff > 1) stop("Iterations are currently only supported for regular panels. See ?seqid to identify the regular sequences in your panel, or just apply this function multiple times."); for(int i = 0; i != l; ++i) { ord2[i] = ord[i] - min[g[i]]; temp = cgs[g[i]] + ord2[i]; if(omap[temp]) stop("Repeated values of timevar within one or more groups"); omap[temp] = i+1; // needed to add 1 to distinguish between 0 and gap } } else { // Regular panel for(int i = 0; i != l; ++i) { ord2[i] = ord[i] - min[g[i]]; temp = cgs[g[i]] + ord2[i]; if(omap[temp]) stop("Repeated values of timevar within one or more groups"); omap[temp] = i; } } for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p]*maxdiff > ags) warning("abs(n * diff) exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { // Positive lagged and iterated differences int d1 = diff[0]; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(L1) colnam[pos] = stub + diffc[0]; else colnam[pos] = "L" + nc[p] + stub + diffc[0]; } ++pos; if(regular) { for(int i = 0; i != l; ++i) { if(ord2[i] >= np) { outp[i] = FUN(x[i], x[omap[cgs[g[i]]+ord2[i]-np]]); } else { outp[i] = fill; } } } else { for(int i = 0; i != l; ++i) { if(ord2[i] >= np && (temp = omap[cgs[g[i]]+ord2[i]-np])) { outp[i] = FUN(x[i], x[temp-1]); } else { outp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = l; i--; ) { if(seen[g[omap[i]]] == max[g[omap[i]]]-start) outp[omap[i]] = fill; else { outp[omap[i]] = FUN(outp[omap[i]], outp[omap[i - np]]); ++seen[g[omap[i]]]; } } } if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = l; i--; ) { if(seen[g[omap[i]]] == max[g[omap[i]]]-start) outtemp[omap[i]] = fill; else { outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); ++seen[g[omap[i]]]; } } } out( _ , pos) = outtemp; if(names) { if(L1) colnam[pos] = stub + diffc[q]; else colnam[pos] = "L" + nc[p] + stub + diffc[q]; } ++pos; } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0]; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(F1) colnam[pos] = stub2 + diffc[0]; else colnam[pos] = "F" + nc[p] + stub + diffc[0]; } ++pos; if(regular) { for(int i = 0; i != l; ++i) { if(ord2[i] < max[g[i]]+np) { outp[i] = FUN(x[i], x[omap[cgs[g[i]]+ord2[i]-np]]); } else { outp[i] = fill; } } } else { for(int i = 0; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] < max[g[i]]+np && (temp = omap[cgs[g[i]]+ord2[i]-np])) { outp[i] = FUN(x[i], x[temp-1]); } else { outp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[omap[i]]] == max[g[omap[i]]]+start) outp[omap[i]] = fill; else { outp[omap[i]] = FUN(outp[omap[i]], outp[omap[i - np]]); ++seen[g[omap[i]]]; } } } if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[omap[i]]] == max[g[omap[i]]]+start) outtemp[omap[i]] = fill; else { outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); ++seen[g[omap[i]]]; } } } out( _ , pos) = outtemp; if(names) { if(F1) colnam[pos] = stub2 + diffc[q]; else colnam[pos] = "F"+ nc[p] + stub + diffc[q]; } ++pos; } } } else { out( _ , pos) = x; if(names) colnam[pos] = "--"; ++pos; } } } } // Previous Version // if(ncol == 1) SHALLOW_DUPLICATE_ATTRIB(out, x); // else if(names) out.attr("dimnames") = List::create(x.attr("names"), colnam); SHALLOW_DUPLICATE_ATTRIB(out, x); if(ncol != 1) { Rf_setAttrib(out, R_NamesSymbol, R_NilValue); // if(x.hasAttribute("names")) out.attr("names") = R_NilValue; Rf_dimgets(out, Dimension(l, ncol)); if(Rf_isObject(x)) { // && !x.inherits("pseries") -> lag matrix in plm is not a pseries anymore anyway... CharacterVector classes = Rf_getAttrib(out, R_ClassSymbol); classes.push_back("matrix"); Rf_classgets(out, classes); } // else { // Rf_classgets(out, Rf_mkString("matrix")); // } if(names) Rf_dimnamesgets(out, List::create(Rf_getAttrib(x, R_NamesSymbol), colnam)); } return out; } // [[Rcpp::export]] NumericVector fdiffgrowthCpp(const NumericVector& x, const IntegerVector& n = 1, const IntegerVector& diff = 1, double fill = NA_REAL, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& t = R_NilValue, int ret = 1, double rho = 1, bool names = true, double power = 1) { std::string stub; if(ret < 4) { double rho2; if(ret == 3) { rho2 = 1; if(power != 1) stop("High-powered log-difference growth rates are currently not supported"); if(names) stub = "Dlog"; } else { rho2 = rho; if(names) stub = (ret == 1 && rho == 1) ? "D" : (ret == 1) ? "QD" : (rho == 1) ? "Dlog" : "QDlog"; // QD for quasi-differences } return fdiffgrowthCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho2](double y, double x) { return y-rho2*x; }); // return y-x; same efficiency as return y-rho*x; when rho = 1 -> smart compiler !, and reduced file size !! } else if (ret == 4) { if(names) stub = "G"; // same speed as fixing 100 ! Faster using (y/x-1)*rho or (x*(1/x)-1)*rho ? if(power == 1) return fdiffgrowthCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho](double y, double x) { return (y-x)*(rho/x); }); // definitely much faster !! return fdiffgrowthCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho, power](double y, double x) { return (pow(y/x, power)-1)*rho; }); // without: 375 kb } else stop("Unknown return option!"); } inline SEXP coln_check(SEXP x) { return Rf_isNull(x) ? NA_STRING : x; } template NumericMatrix fdiffgrowthmCppImpl(const NumericMatrix& x, const IntegerVector& n = 1, const IntegerVector& diff = 1, double fill = NA_REAL, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& t = R_NilValue, std::string stub = "", bool names = true, F FUN = [](double y, double x) { return y-x; }) { int l = x.nrow(), col = x.ncol(), ns = n.size(), ds = diff.size(), zeros = 0, pos = INT_MAX; IntegerVector absn = no_init_vector(ns); for(int i = ns; i--; ) { if(n[i] == pos) stop("duplicated values in n detected"); // because one might mistakenly pass a factor to the n-slot pos = n[i]; if(pos == 0) ++zeros; if(pos < 0) { if(pos == NA_INTEGER) stop("NA in n"); absn[i] = -pos; } else absn[i] = pos; } pos = 0; std::string stub2 = names ? "F" + stub : ""; int ncol = ((ns-zeros)*ds+zeros)*col; NumericMatrix out = no_init_matrix(l, ncol); CharacterVector colnam = names ? no_init_vector(ncol) : no_init_vector(1); CharacterVector nc = names ? Rf_coerceVector(absn, STRSXP) : NA_STRING; CharacterVector diffc = names ? Rf_coerceVector(diff, STRSXP) : NA_STRING; CharacterVector coln = names ? coln_check(colnames(x)) : NA_STRING; if(names && coln[0] == NA_STRING) names = false; if(ng == 0) { // No groups if(Rf_isNull(t)) { // Ordered data for(int j = 0; j != col; ++j) { NumericMatrix::ConstColumn column = x( _ , j); for(int p = 0; p != ns; ++p) { int np = n[p]; if(np>0) { // Positive lagged and iterated differences int d1 = diff[0], end = np*d1; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end >= l) stop("n * diff needs to be < nrow(x)"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(L1) colnam[pos] = stub + diffc[0] + "." + coln[j]; else colnam[pos] = "L" + nc[p] + stub + diffc[0] + "." + coln[j]; } ++pos; for(int i = np; i != l; ++i) outp[i] = FUN(column[i], column[i - np]); if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1)-1; for(int i = l-1; i != start; --i) outp[i] = FUN(outp[i], outp[i - np]); } for(int i = end; i--; ) outp[i] = fill; if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = np*dq; if(end >= l) stop("n * diff needs to be < nrow(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1)-1; for(int i = l-1; i != start; --i) outtemp[i] = FUN(outtemp[i], outtemp[i - np]); } for(int i = np*L_dq; i != end; ++i) outtemp[i] = fill; out( _ , pos) = outtemp; if(names) { if(L1) colnam[pos] = stub + diffc[q] + "." + coln[j]; else colnam[pos] = "L" + nc[p] + stub + diffc[q] + "." + coln[j]; } ++pos; } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0], end = l+np*d1; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end <= 0) stop("abs(n * diff) needs to be < nrow(x)"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(F1) colnam[pos] = stub2 + diffc[0] + "." + coln[j]; else colnam[pos] = "F" + nc[p] + stub + diffc[0] + "." + coln[j]; } ++pos; for(int i = l+np; i--; ) outp[i] = FUN(column[i], column[i - np]); if(d1 > 1) for(int k = 1; k != d1; ++k) { int final = l+np*(k+1); for(int i = 0; i != final; ++i) outp[i] = FUN(outp[i], outp[i - np]); } for(int i = end; i != l; ++i) outp[i] = fill; if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = l+np*dq, start = l+np*L_dq; if(end <= 0) stop("abs(n * diff) needs to be < nrow(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int final = l+np*(k+1); for(int i = 0; i != final; ++i) outtemp[i] = FUN(outtemp[i], outtemp[i - np]); } for(int i = end; i != start; ++i) outtemp[i] = fill; out( _ , pos) = outtemp; if(names) { if(F1) colnam[pos] = stub2 + diffc[q] + "." + coln[j]; else colnam[pos] = "F"+ nc[p] + stub + diffc[q] + "." + coln[j]; } ++pos; } } } else { out( _ , pos) = column; if(names) colnam[pos] = coln[j]; ++pos; } } } } else { // Unordered data: Timevar provided IntegerVector ord = t; if(l != ord.size()) stop("length(x) must match length(t)"); int min = INT_MAX, max = INT_MIN, osize, temp; for(int i = 0; i != l; ++i) { if(ord[i] < min) min = ord[i]; if(ord[i] > max) max = ord[i]; } if(min == NA_INTEGER) stop("Timevar contains missing values"); osize = max-min+1; bool regular = osize == l; IntegerVector omap(osize), ord2 = regular ? no_init_vector(1) : no_init_vector(l); if(!regular) { // Irregular time series if(osize > 10000000 && osize > 3 * l) warning("Your time series is very irregular. Need to create an internal ordering vector of length %s to represent it.", osize); if(Rcpp::max(diff) > 1) stop("Iterations are currently only supported for regular time series. See ?seqid to identify the regular sequences in your time series, or just apply this function multiple times."); for(int i = 0; i != l; ++i) { temp = ord[i] - min; // Best ? Or direct assign to ord2[i] ? Also check for panel version.. if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i+1; ord2[i] = temp; } } else { // Regular time series for(int i = 0; i != l; ++i) { temp = ord[i] - min; if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i; } } for(int j = 0; j != col; ++j) { NumericMatrix::ConstColumn column = x( _ , j); for(int p = 0; p != ns; ++p) { int np = n[p]; if(np>0) { // Positive lagged and iterated differences int d1 = diff[0], end = np*d1; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end >= l) stop("n * diff needs to be < nrow(x)"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(L1) colnam[pos] = stub + diffc[0] + "." + coln[j]; else colnam[pos] = "L" + nc[p] + stub + diffc[0] + "." + coln[j]; } ++pos; if(regular) { for(int i = np; i != l; ++i) outp[omap[i]] = FUN(column[omap[i]], column[omap[i - np]]); } else { for(int i = 0; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] >= np && (temp = omap[ord2[i] - np])) { outp[i] = FUN(column[i], column[temp-1]); } else { outp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1)-1; for(int i = l-1; i != start; --i) outp[omap[i]] = FUN(outp[omap[i]], outp[omap[i - np]]); } if(regular) for(int i = end; i--; ) outp[omap[i]] = fill; if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = np*dq; if(end >= l) stop("n * diff needs to be < nrow(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1)-1; for(int i = l-1; i != start; --i) outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); } for(int i = np*L_dq; i != end; ++i) outtemp[omap[i]] = fill; out( _ , pos) = outtemp; if(names) { if(L1) colnam[pos] = stub + diffc[q] + "." + coln[j]; else colnam[pos] = "L" + nc[p] + stub + diffc[q] + "." + coln[j]; } ++pos; } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0], end = l+np*d1; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end <= 0) stop("abs(n * diff) needs to be < nrow(x)"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(F1) colnam[pos] = stub2 + diffc[0] + "." + coln[j]; else colnam[pos] = "F" + nc[p] + stub + diffc[0] + "." + coln[j]; } ++pos; if(regular) { for(int i = l+np; i--; ) outp[omap[i]] = FUN(column[omap[i]], column[omap[i - np]]); } else { for(int i = 0, osnp = osize + np; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] < osnp && (temp = omap[ord2[i] - np])) { outp[i] = FUN(column[i], column[temp-1]); } else { outp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int final = l+np*(k+1); for(int i = 0; i != final; ++i) outp[omap[i]] = FUN(outp[omap[i]], outp[omap[i - np]]); } if(regular) for(int i = end; i != l; ++i) outp[omap[i]] = fill; if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = l+np*dq, start = l+np*L_dq; if(end <= 0) stop("abs(n * diff) needs to be < nrow(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int final = l+np*(k+1); for(int i = 0; i != final; ++i) outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); } for(int i = end; i != start; ++i) outtemp[omap[i]] = fill; out( _ , pos) = outtemp; if(names) { if(F1) colnam[pos] = stub2 + diffc[q] + "." + coln[j]; else colnam[pos] = "F"+ nc[p] + stub + diffc[q] + "." + coln[j]; } ++pos; } } } else { out( _ , pos) = column; if(names) colnam[pos] = coln[j]; ++pos; } } } } } else { // With groups if(l != g.size()) stop("nrow(x) must match length(g)"); int ags = l/ng, ngp = ng+1, maxdiff = max(diff); if(Rf_isNull(t)) { // Ordered data bool cond = !Rf_isNull(gs); IntegerVector gsv = (cond || maxdiff == 1) ? no_init_vector(1) : IntegerVector(ng); int *pgsv = cond ? INTEGER(gs)-1 : INTEGER(gsv)-1; if(maxdiff != 1) { if(cond) { if(ng != Rf_length(gs)) stop("ng must match length(gs)"); } else { for(int i = 0; i != l; ++i) ++pgsv[g[i]]; } } // int seen[ngp], memsize = sizeof(int)*(ngp); for(int j = 0; j != col; ++j) { NumericMatrix::ConstColumn column = x( _ , j); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p]*maxdiff > ags) warning("abs(n * diff) exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { // Positive lagged and iterated differences int d1 = diff[0]; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericMatrix::Column outp = out( _ , pos); std::vector seen(ngp); // memset(seen, 0, memsize); if(names) { if(L1) colnam[pos] = stub + diffc[0] + "." + coln[j]; else colnam[pos] = "L" + nc[p] + stub + diffc[0] + "." + coln[j]; } ++pos; for(int i = 0; i != l; ++i) { if(seen[g[i]] == np) outp[i] = FUN(column[i], column[i - np]); else { outp[i] = fill; ++seen[g[i]]; } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = l; i--; ) { if(seen[g[i]] == pgsv[g[i]]-start) outp[i] = fill; else { outp[i] = FUN(outp[i], outp[i - np]); ++seen[g[i]]; } } } if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = l; i--; ) { if(seen[g[i]] == pgsv[g[i]]-start) outtemp[i] = fill; else { outtemp[i] = FUN(outtemp[i], outtemp[i - np]); ++seen[g[i]]; } } } out( _ , pos) = outtemp; if(names) { if(L1) colnam[pos] = stub + diffc[q] + "." + coln[j]; else colnam[pos] = "L" + nc[p] + stub + diffc[q] + "." + coln[j]; } ++pos; } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0]; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericMatrix::Column outp = out( _ , pos); std::vector seen(ngp); // memset(seen, 0, memsize); if(names) { if(F1) colnam[pos] = stub2 + diffc[0] + "." + coln[j]; else colnam[pos] = "F" + nc[p] + stub + diffc[0] + "." + coln[j]; } ++pos; for(int i = l; i--; ) { if(seen[g[i]] == np) outp[i] = FUN(column[i], column[i - np]); else { outp[i] = fill; --seen[g[i]]; } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[i]] == pgsv[g[i]]+start) outp[i] = fill; else { outp[i] = FUN(outp[i], outp[i - np]); ++seen[g[i]]; } } } if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[i]] == pgsv[g[i]]+start) outtemp[i] = fill; else { outtemp[i] = FUN(outtemp[i], outtemp[i - np]); ++seen[g[i]]; } } } out( _ , pos) = outtemp; if(names) { if(F1) colnam[pos] = stub2 + diffc[q] + "." + coln[j]; else colnam[pos] = "F"+ nc[p] + stub + diffc[q] + "." + coln[j]; } ++pos; } } } else { out( _ , pos) = column; if(names) colnam[pos] = coln[j]; ++pos; } } } } else { // Unordered data: Timevar Provided IntegerVector ord = t; int temp; if(l != ord.size()) stop("length(x) must match length(t)"); IntegerVector min(ngp, INT_MAX), max(ngp, INT_MIN), cgs = no_init_vector(ngp); for(int i = 0; i != l; ++i) { temp = g[i]; if(ord[i] < min[temp]) min[temp] = ord[i]; if(ord[i] > max[temp]) max[temp] = ord[i]; } temp = 0; for(int i = 1; i != ngp; ++i) { if(min[i] == NA_INTEGER) stop("Timevar contains missing values"); if(min[i] == INT_MAX) continue; // Needed in case of unused factor levels (group vector too large) cgs[i] = temp; // This needs to b here (for unused factor levels case...) max[i] -= min[i] - 1; // need max[i] which stores the complete group sizes only if p<0 e.g. if computing leads.. temp += max[i]; } // omap provides the ordering to order the vector (needed to find previous / next values) bool regular = temp == l; IntegerVector omap(temp), ord2 = no_init_vector(l), index = no_init_vector(l); if(!regular) { // Irregular panel if(temp > 10000000 && temp > 3 * l) warning("Your panel is very irregular. Need to create an internal ordering vector of length %s to represent it.", temp); if(maxdiff > 1) stop("Iterations are currently only supported for regular panels. See ?seqid to identify the regular sequences in your panel, or just apply this function multiple times."); for(int i = 0; i != l; ++i) { ord2[i] = ord[i] - min[g[i]]; index[i] = cgs[g[i]] + ord2[i]; if(omap[index[i]]) stop("Repeated values of timevar within one or more groups"); omap[index[i]] = i+1; // needed to add 1 to distinguish between 0 and gap } } else { // Regular panel for(int i = 0; i != l; ++i) { ord2[i] = ord[i] - min[g[i]]; index[i] = cgs[g[i]] + ord2[i]; if(omap[index[i]]) stop("Repeated values of timevar within one or more groups"); omap[index[i]] = i; } } for(int j = 0; j != col; ++j) { NumericMatrix::ConstColumn column = x( _ , j); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p]*maxdiff > ags) warning("abs(n * diff) exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { // Positive lagged and iterated differences int d1 = diff[0]; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(L1) colnam[pos] = stub + diffc[0] + "." + coln[j]; else colnam[pos] = "L" + nc[p] + stub + diffc[0] + "." + coln[j]; } ++pos; if(regular) { for(int i = 0; i != l; ++i) { if(ord2[i] >= np) { outp[i] = FUN(column[i], column[omap[index[i]-np]]); } else { outp[i] = fill; } } } else { for(int i = 0; i != l; ++i) { if(ord2[i] >= np && (temp = omap[index[i]-np])) { outp[i] = FUN(column[i], column[temp-1]); } else { outp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = l; i--; ) { if(seen[g[omap[i]]] == max[g[omap[i]]]-start) outp[omap[i]] = fill; else { outp[omap[i]] = FUN(outp[omap[i]], outp[omap[i - np]]); ++seen[g[omap[i]]]; } } } if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = l; i--; ) { if(seen[g[omap[i]]] == max[g[omap[i]]]-start) outtemp[omap[i]] = fill; else { outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); ++seen[g[omap[i]]]; } } } out( _ , pos) = outtemp; if(names) { if(L1) colnam[pos] = stub + diffc[q] + "." + coln[j]; else colnam[pos] = "L" + nc[p] + stub + diffc[q] + "." + coln[j]; } ++pos; } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0]; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(F1) colnam[pos] = stub2 + diffc[0] + "." + coln[j]; else colnam[pos] = "F" + nc[p] + stub + diffc[0] + "." + coln[j]; } ++pos; if(regular) { for(int i = 0; i != l; ++i) { if(ord2[i] < max[g[i]]+np) { outp[i] = FUN(column[i], column[omap[index[i]-np]]); } else { outp[i] = fill; } } } else { for(int i = 0; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] < max[g[i]]+np && (temp = omap[index[i]-np])) { outp[i] = FUN(column[i], column[temp-1]); } else { outp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[omap[i]]] == max[g[omap[i]]]+start) outp[omap[i]] = fill; else { outp[omap[i]] = FUN(outp[omap[i]], outp[omap[i - np]]); ++seen[g[omap[i]]]; } } } if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[omap[i]]] == max[g[omap[i]]]+start) outtemp[omap[i]] = fill; else { outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); ++seen[g[omap[i]]]; } } } out( _ , pos) = outtemp; if(names) { if(F1) colnam[pos] = stub2 + diffc[q] + "." + coln[j]; else colnam[pos] = "F"+ nc[p] + stub + diffc[q] + "." + coln[j]; } ++pos; } } } else { out( _ , pos) = column; if(names) colnam[pos] = coln[j]; ++pos; } } } } } // Previous Solution: // if(names) { // out.attr("dimnames") = List::create(rownames(x), colnam); // } else { // if(ns*ds == 1) SHALLOW_DUPLICATE_ATTRIB(out, x); // // else rownames(out) = rownames(x); // redundant !! // } SHALLOW_DUPLICATE_ATTRIB(out, x); if(ncol != col) Rf_dimgets(out, Dimension(l, ncol)); if(names) { Rf_dimnamesgets(out, List::create(rownames(x), colnam)); // colnames(out) = colnam; also deletes rownames ! } else if(ncol != col) { Rf_setAttrib(out, R_DimNamesSymbol, R_NilValue); } return out; } // [[Rcpp::export]] NumericMatrix fdiffgrowthmCpp(const NumericMatrix& x, const IntegerVector& n = 1, const IntegerVector& diff = 1, double fill = NA_REAL, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& t = R_NilValue, int ret = 1, double rho = 1, bool names = true, double power = 1) { std::string stub; if(ret < 4) { double rho2; if(ret == 3) { rho2 = 1; if(power != 1) stop("High-powered log-difference growth rates are currently not supported"); if(names) stub = "Dlog"; } else { rho2 = rho; if(names) stub = (ret == 1 && rho == 1) ? "D" : (ret == 1) ? "QD" : (rho == 1) ? "Dlog" : "QDlog"; // QD for quasi-differences } return fdiffgrowthmCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho2](double y, double x) { return y-rho2*x; }); // return y-x; same efficiency as return y-rho*x; when rho = 1 -> smart compiler !, and reduced file size !! } else if (ret == 4) { if(names) stub = "G"; if(power == 1) return fdiffgrowthmCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho](double y, double x) { return (y-x)*(rho/x); }); // same speed as fixing 100 ! Faster using (y/x-1)*rho or (x*(1/x)-1)*rho ? return fdiffgrowthmCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho, power](double y, double x) { return (pow(y/x, power)-1)*rho; }); } else stop("Unknown return option!"); } template List fdiffgrowthlCppImpl(const List& x, const IntegerVector& n = 1, const IntegerVector& diff = 1, double fill = NA_REAL, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& t = R_NilValue, std::string stub = "", bool names = true, F FUN = [](double y, double x) { return y-x; }) { // const needed for #if response... int l = x.size(), ns = n.size(), ds = diff.size(), zeros = 0, pos = INT_MAX; IntegerVector absn = no_init_vector(ns); for(int i = ns; i--; ) { if(n[i] == pos) stop("duplicated values in n detected"); // because one might mistakenly pass a factor to the n-slot pos = n[i]; if(pos == 0) ++zeros; if(pos < 0) { if(pos == NA_INTEGER) stop("NA in n"); absn[i] = -pos; } else absn[i] = pos; } pos = 0; std::string stub2 = names ? "F" + stub : ""; int ncol = ((ns-zeros)*ds+zeros)*l; List out(ncol); CharacterVector nam = names ? no_init_vector(ncol) : no_init_vector(1); CharacterVector nc = names ? Rf_coerceVector(absn, STRSXP) : NA_STRING; CharacterVector diffc = names ? Rf_coerceVector(diff, STRSXP) : NA_STRING; CharacterVector na = names ? coln_check(Rf_getAttrib(x, R_NamesSymbol)) : NA_STRING; if(names && na[0] == NA_STRING) names = false; if(ng == 0) { // No groups if(Rf_isNull(t)) { // Ordered data for(int j = 0; j != l; ++j) { NumericVector column = x[j]; int row = column.size(); for(int p = 0; p != ns; ++p) { int np = n[p]; if(np>0) { // Positive lagged and iterated differences int d1 = diff[0], end = np*d1; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end >= row) stop("n * diff needs to be < nrow(x)"); NumericVector outjp = no_init_vector(row); if(names) { if(L1) nam[pos] = stub + diffc[0] + "." + na[j]; else nam[pos] = "L" + nc[p] + stub + diffc[0] + "." + na[j]; } for(int i = np; i != row; ++i) outjp[i] = FUN(column[i], column[i - np]); if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1)-1; for(int i = row-1; i != start; --i) outjp[i] = FUN(outjp[i], outjp[i - np]); } for(int i = end; i--; ) outjp[i] = fill; SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos++] = outjp; if(ds > 1) { NumericVector outtemp = Rf_shallow_duplicate(outjp); for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = np*dq; if(end >= row) stop("n * diff needs to be < nrow(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1)-1; for(int i = row-1; i != start; --i) outtemp[i] = FUN(outtemp[i], outtemp[i - np]); } for(int i = np*L_dq; i != end; ++i) outtemp[i] = fill; if(names) { if(L1) nam[pos] = stub + diffc[q] + "." + na[j]; else nam[pos] = "L" + nc[p] + stub + diffc[q] + "." + na[j]; } out[pos++] = Rf_shallow_duplicate(outtemp); // or Rf_copyVector, Rf_shallow_duplicate, Rf_lazy_duplicate http://mtweb.cs.ucl.ac.uk/mus/bin/install_R/R-3.1.1/src/main/duplicate.c } // https://rlang.r-lib.org/reference/duplicate.html } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0], end = row+np*d1; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end <= 0) stop("abs(n * diff) needs to be < nrow(x)"); NumericVector outjp = no_init_vector(row); SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(names) { if(F1) nam[pos] = stub2 + diffc[0] + "." + na[j]; else nam[pos] = "F" + nc[p] + stub + diffc[0] + "." + na[j]; } for(int i = row+np; i--; ) outjp[i] = FUN(column[i], column[i - np]); if(d1 > 1) for(int k = 1; k != d1; ++k) { int final = row+np*(k+1); for(int i = 0; i != final; ++i) outjp[i] = FUN(outjp[i], outjp[i - np]); } for(int i = end; i != row; ++i) outjp[i] = fill; out[pos++] = outjp; if(ds > 1) { NumericVector outtemp = Rf_shallow_duplicate(outjp); for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = row+np*dq, start = row+np*L_dq; if(end <= 0) stop("abs(n * diff) needs to be < nrow(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int final = row+np*(k+1); for(int i = 0; i != final; ++i) outtemp[i] = FUN(outtemp[i], outtemp[i - np]); } for(int i = end; i != start; ++i) outtemp[i] = fill; if(names) { if(F1) nam[pos] = stub2 + diffc[q] + "." + na[j]; else nam[pos] = "F"+ nc[p] + stub + diffc[q] + "." + na[j]; } out[pos++] = Rf_shallow_duplicate(outtemp); } } } else { if(names) nam[pos] = na[j]; out[pos++] = column; } } } } else { // Unordered data: Timevar provided IntegerVector ord = t; int min = INT_MAX, max = INT_MIN, osize, temp, os = ord.size(); if(Rf_length(x[0]) != os) stop("length(x) must match length(t)"); for(int i = 0; i != os; ++i) { if(ord[i] < min) min = ord[i]; if(ord[i] > max) max = ord[i]; } if(min == NA_INTEGER) stop("Timevar contains missing values"); osize = max-min+1; bool regular = osize == os; IntegerVector omap(osize), ord2 = regular ? no_init_vector(1) : no_init_vector(os); if(!regular) { // Irregular time series if(osize > 10000000 && osize > 3 * os) warning("Your time series is very irregular. Need to create an internal ordering vector of length %s to represent it.", osize); if(Rcpp::max(diff) > 1) stop("Iterations are currently only supported for regular time series. See ?seqid to identify the regular sequences in your time series, or just apply this function multiple times."); for(int i = 0; i != os; ++i) { temp = ord[i] - min; // Best ? Or direct assign to ord2[i] ? Also check for panel version.. if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i+1; ord2[i] = temp; } } else { // Regular time series for(int i = 0; i != os; ++i) { temp = ord[i] - min; if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i; } } for(int j = 0; j != l; ++j) { NumericVector column = x[j]; if(os != column.size()) stop("nrow(x) must match length(t)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(np>0) { // Positive lagged and iterated differences int d1 = diff[0], end = np*d1; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end >= os) stop("n * diff needs to be < nrow(x)"); NumericVector outjp = no_init_vector(os); SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(names) { if(L1) nam[pos] = stub + diffc[0] + "." + na[j]; else nam[pos] = "L" + nc[p] + stub + diffc[0] + "." + na[j]; } if(regular) { for(int i = np; i != os; ++i) outjp[omap[i]] = FUN(column[omap[i]], column[omap[i - np]]); } else { for(int i = 0; i != os; ++i) { // Smarter solution using while ??? if(ord2[i] >= np && (temp = omap[ord2[i] - np])) { outjp[i] = FUN(column[i], column[temp-1]); } else { outjp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1)-1; for(int i = os-1; i != start; --i) outjp[omap[i]] = FUN(outjp[omap[i]], outjp[omap[i - np]]); } if(regular) for(int i = end; i--; ) outjp[omap[i]] = fill; out[pos++] = outjp; if(ds > 1) { NumericVector outtemp = Rf_shallow_duplicate(outjp); for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = np*dq; if(end >= os) stop("n * diff needs to be < nrow(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1)-1; for(int i = os-1; i != start; --i) outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); } for(int i = np*L_dq; i != end; ++i) outtemp[omap[i]] = fill; if(names) { if(L1) nam[pos] = stub + diffc[q] + "." + na[j]; else nam[pos] = "L" + nc[p] + stub + diffc[q] + "." + na[j]; } out[pos++] = Rf_shallow_duplicate(outtemp); } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0], end = os+np*d1; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end <= 0) stop("abs(n * diff) needs to be < nrow(x)"); NumericVector outjp = no_init_vector(os); SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(names) { if(F1) nam[pos] = stub2 + diffc[0] + "." + na[j]; else nam[pos] = "F" + nc[p] + stub + diffc[0] + "." + na[j]; } if(regular) { for(int i = os+np; i--; ) outjp[omap[i]] = FUN(column[omap[i]], column[omap[i - np]]); } else { for(int i = 0, osnp = osize + np; i != os; ++i) { // Smarter solution using while ??? if(ord2[i] < osnp && (temp = omap[ord2[i] - np])) { outjp[i] = FUN(column[i], column[temp-1]); } else { outjp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int final = os+np*(k+1); for(int i = 0; i != final; ++i) outjp[omap[i]] = FUN(outjp[omap[i]], outjp[omap[i - np]]); } if(regular) for(int i = end; i != os; ++i) outjp[omap[i]] = fill; out[pos++] = outjp; if(ds > 1) { NumericVector outtemp = Rf_shallow_duplicate(outjp); for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = os+np*dq, start = os+np*L_dq; if(end <= 0) stop("abs(n * diff) needs to be < nrow(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int final = os+np*(k+1); for(int i = 0; i != final; ++i) outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); } for(int i = end; i != start; ++i) outtemp[omap[i]] = fill; if(names) { if(F1) nam[pos] = stub2 + diffc[q] + "." + na[j]; else nam[pos] = "F"+ nc[p] + stub + diffc[q] + "." + na[j]; } out[pos++] = Rf_shallow_duplicate(outtemp); } } } else { if(names) nam[pos] = na[j]; out[pos++] = column; } } } } } else { // With groups int gss = g.size(), ags = gss/ng, ngp = ng+1, maxdiff = max(diff); if(Rf_isNull(t)) { // Ordered data bool cond = !Rf_isNull(gs); IntegerVector gsv = (cond || maxdiff == 1) ? no_init_vector(1) : IntegerVector(ng); int *pgsv = cond ? INTEGER(gs)-1 : INTEGER(gsv)-1; if(maxdiff != 1) { if(cond) { if(ng != Rf_length(gs)) stop("ng must match length(gs)"); } else { for(int i = 0; i != gss; ++i) ++pgsv[g[i]]; } } // int seen[ngp], memsize = sizeof(int)*(ngp); for(int j = 0; j != l; ++j) { NumericVector column = x[j]; if(gss != column.size()) stop("nrow(x) must match length(g)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p]*maxdiff > ags) warning("abs(n * diff) exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { // Positive lagged and iterated differences int d1 = diff[0]; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericVector outjp = no_init_vector(gss); SHALLOW_DUPLICATE_ATTRIB(outjp, column); std::vector seen(ngp); // memset(seen, 0, memsize); if(names) { if(L1) nam[pos] = stub + diffc[0] + "." + na[j]; else nam[pos] = "L" + nc[p] + stub + diffc[0] + "." + na[j]; } for(int i = 0; i != gss; ++i) { if(seen[g[i]] == np) outjp[i] = FUN(column[i], column[i - np]); else { outjp[i] = fill; ++seen[g[i]]; } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = gss; i--; ) { if(seen[g[i]] == pgsv[g[i]]-start) outjp[i] = fill; else { outjp[i] = FUN(outjp[i], outjp[i - np]); ++seen[g[i]]; } } } out[pos++] = outjp; if(ds > 1) { NumericVector outtemp = Rf_shallow_duplicate(outjp); for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = gss; i--; ) { if(seen[g[i]] == pgsv[g[i]]-start) outtemp[i] = fill; else { outtemp[i] = FUN(outtemp[i], outtemp[i - np]); ++seen[g[i]]; } } } if(names) { if(L1) nam[pos] = stub + diffc[q] + "." + na[j]; else nam[pos] = "L" + nc[p] + stub + diffc[q] + "." + na[j]; } out[pos++] = Rf_shallow_duplicate(outtemp); } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0]; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericVector outjp = no_init_vector(gss); SHALLOW_DUPLICATE_ATTRIB(outjp, column); std::vector seen(ngp); // memset(seen, 0, memsize); if(names) { if(F1) nam[pos] = stub2 + diffc[0] + "." + na[j]; else nam[pos] = "F" + nc[p] + stub + diffc[0] + "." + na[j]; } for(int i = gss; i--; ) { if(seen[g[i]] == np) outjp[i] = FUN(column[i], column[i - np]); else { outjp[i] = fill; --seen[g[i]]; } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != gss; ++i) { if(seen[g[i]] == pgsv[g[i]]+start) outjp[i] = fill; else { outjp[i] = FUN(outjp[i], outjp[i - np]); ++seen[g[i]]; } } } out[pos++] = outjp; if(ds > 1) { NumericVector outtemp = Rf_shallow_duplicate(outjp); for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != gss; ++i) { if(seen[g[i]] == pgsv[g[i]]+start) outtemp[i] = fill; else { outtemp[i] = FUN(outtemp[i], outtemp[i - np]); ++seen[g[i]]; } } } if(names) { if(F1) nam[pos] = stub2 + diffc[q] + "." + na[j]; else nam[pos] = "F"+ nc[p] + stub + diffc[q] + "." + na[j]; } out[pos++] = Rf_shallow_duplicate(outtemp); } } } else { if(names) nam[pos] = na[j]; out[pos++] = column; } } } } else { // Unordered data: Timevar Provided IntegerVector ord = t; int temp; if(gss != ord.size()) stop("length(x) must match length(t)"); IntegerVector min(ngp, INT_MAX), max(ngp, INT_MIN), cgs = no_init_vector(ngp); for(int i = 0; i != gss; ++i) { temp = g[i]; if(ord[i] < min[temp]) min[temp] = ord[i]; if(ord[i] > max[temp]) max[temp] = ord[i]; } temp = 0; for(int i = 1; i != ngp; ++i) { if(min[i] == NA_INTEGER) stop("Timevar contains missing values"); if(min[i] == INT_MAX) continue; // Needed in case of unused factor levels (group vector too large) cgs[i] = temp; // This needs to b here (for unused factor levels case...) max[i] -= min[i] - 1; // need max[i] which stores the complete group sizes only if p<0 e.g. if computing leads.. temp += max[i]; } // omap provides the ordering to order the vector (needed to find previous / next values) bool regular = temp == gss; IntegerVector omap(temp), ord2 = no_init_vector(gss), index = no_init_vector(gss); if(!regular) { // Irregular panel if(temp > 10000000 && temp > 3 * gss) warning("Your panel is very irregular. Need to create an internal ordering vector of length %s to represent it.", temp); if(maxdiff > 1) stop("Iterations are currently only supported for regular panels. See ?seqid to identify the regular sequences in your panel, or just apply this function multiple times."); for(int i = 0; i != gss; ++i) { ord2[i] = ord[i] - min[g[i]]; index[i] = cgs[g[i]] + ord2[i]; if(omap[index[i]]) stop("Repeated values of timevar within one or more groups"); omap[index[i]] = i+1; // needed to add 1 to distinguish between 0 and gap } } else { // Regular panel for(int i = 0; i != gss; ++i) { ord2[i] = ord[i] - min[g[i]]; index[i] = cgs[g[i]] + ord2[i]; if(omap[index[i]]) stop("Repeated values of timevar within one or more groups"); omap[index[i]] = i; } } for(int j = 0; j != l; ++j) { NumericVector column = x[j]; if(gss != column.size()) stop("nrow(x) must match length(g)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p]*maxdiff > ags) warning("abs(n * diff) exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { // Positive lagged and iterated differences int d1 = diff[0]; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericVector outjp = no_init_vector(gss); SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(names) { if(L1) nam[pos] = stub + diffc[0] + "." + na[j]; else nam[pos] = "L" + nc[p] + stub + diffc[0] + "." + na[j]; } if(regular) { for(int i = 0; i != gss; ++i) { if(ord2[i] >= np) { outjp[i] = FUN(column[i], column[omap[index[i]-np]]); } else { outjp[i] = fill; } } } else { for(int i = 0; i != gss; ++i) { if(ord2[i] >= np && (temp = omap[index[i]-np])) { outjp[i] = FUN(column[i], column[temp-1]); } else { outjp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = gss; i--; ) { if(seen[g[omap[i]]] == max[g[omap[i]]]-start) outjp[omap[i]] = fill; else { outjp[omap[i]] = FUN(outjp[omap[i]], outjp[omap[i - np]]); ++seen[g[omap[i]]]; } } } out[pos++] = outjp; if(ds > 1) { NumericVector outtemp = Rf_shallow_duplicate(outjp); for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = gss; i--; ) { if(seen[g[omap[i]]] == max[g[omap[i]]]-start) outtemp[omap[i]] = fill; else { outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); ++seen[g[omap[i]]]; } } } if(names) { if(L1) nam[pos] = stub + diffc[q] + "." + na[j]; else nam[pos] = "L" + nc[p] + stub + diffc[q] + "." + na[j]; } out[pos++] = Rf_shallow_duplicate(outtemp); } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0]; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericVector outjp = no_init_vector(gss); SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(names) { if(F1) nam[pos] = stub2 + diffc[0] + "." + na[j]; else nam[pos] = "F" + nc[p] + stub + diffc[0] + "." + na[j]; } if(regular) { for(int i = 0; i != gss; ++i) { if(ord2[i] < max[g[i]]+np) { outjp[i] = FUN(column[i], column[omap[index[i]-np]]); } else { outjp[i] = fill; } } } else { for(int i = 0; i != gss; ++i) { // Smarter solution using while ??? if(ord2[i] < max[g[i]]+np && (temp = omap[index[i]-np])) { outjp[i] = FUN(column[i], column[temp-1]); } else { outjp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != gss; ++i) { if(seen[g[omap[i]]] == max[g[omap[i]]]+start) outjp[omap[i]] = fill; else { outjp[omap[i]] = FUN(outjp[omap[i]], outjp[omap[i - np]]); ++seen[g[omap[i]]]; } } } out[pos++] = outjp; if(ds > 1) { NumericVector outtemp = Rf_shallow_duplicate(outjp); for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != gss; ++i) { if(seen[g[omap[i]]] == max[g[omap[i]]]+start) outtemp[omap[i]] = fill; else { outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); ++seen[g[omap[i]]]; } } } if(names) { if(F1) nam[pos] = stub2 + diffc[q] + "." + na[j]; else nam[pos] = "F"+ nc[p] + stub + diffc[q] + "." + na[j]; } out[pos++] = Rf_shallow_duplicate(outtemp); } } } else { if(names) nam[pos] = na[j]; out[pos++] = column; } } } } } SHALLOW_DUPLICATE_ATTRIB(out, x); if(names) { // best way to code this ? Rf_namesgets(out, nam); } else if(ncol != l) { Rf_setAttrib(out, R_NamesSymbol, R_NilValue); } return out; } // [[Rcpp::export]] List fdiffgrowthlCpp(const List& x, const IntegerVector& n = 1, const IntegerVector& diff = 1, double fill = NA_REAL, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& t = R_NilValue, int ret = 1, double rho = 1, bool names = true, double power = 1) { std::string stub; if(ret < 4) { double rho2; if(ret == 3) { rho2 = 1; if(power != 1) stop("High-powered log-difference growth rates are currently not supported"); if(names) stub = "Dlog"; } else { rho2 = rho; if(names) stub = (ret == 1 && rho == 1) ? "D" : (ret == 1) ? "QD" : (rho == 1) ? "Dlog" : "QDlog"; // QD for quasi-differences } return fdiffgrowthlCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho2](double y, double x) { return y-rho2*x; }); // return y-x; same efficiency as return y-rho*x; when rho = 1 -> smart compiler !, and reduced file size !! } else if (ret == 4) { if(names) stub = "G"; if(power == 1) return fdiffgrowthlCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho](double y, double x) { return (y-x)*(rho/x); }); // same speed as fixing 100 ! Faster using (y/x-1)*rho or (x*(1/x)-1)*rho ? return fdiffgrowthlCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho, power](double y, double x) { return (pow(y/x, power)-1)*rho; }); } else stop("Unknown return option!"); } // Old attempts without template .... // #define FUN(y, x) (ret == 1 && rho1) ? ((y)-(x)) : // (ret == 1) ? ((y)-rho*(x)) : // (ret == 2 && rho1) ? (log((y)*(1/(x)))) : // (ret == 2) ? (log((y)*(1/(rho*(x))))) : // (ret == 3) ? (((y)-(x))*(100/(x))) : (log((y)*(1/(x)))*100) // #define rho1 (rho == 1) // #define retm (ret) // // #if retm == 1 && rho1 // #define FUN(y, x) ((y)-(x)) // #elif retm == 1 // #define FUN(y, x) ((y)-rho*(x)) // #elif retm == 2 && rho1 // #define FUN(y, x) (log((y)*(1/(x)))) // #elif retm == 2 // #define FUN(y, x) (log((y)*(1/(rho*(x))))) // #elif retm == 3 // #define FUN(y, x) (((y)-(x))*(100/(x))) // #elif retm == 4 // #define FUN(y, x) (log((y)*(1/(x)))*100) // #endif // Previous: Internally computing log-differences--- compiled file was 648 kb, without debug info !! // // [[Rcpp::export]] // NumericVector fdiffgrowthCpp(const NumericVector& x, const IntegerVector& n = 1, const IntegerVector& diff = 1, // double fill = NA_REAL, int ng = 0, const IntegerVector& g = 0, // const SEXP& gs = R_NilValue, const SEXP& t = R_NilValue, // int ret = 1, double rho = 1, bool names = true) { // // std::string stub; // switch (ret) // { // [rho] or [&rho] ? // https://stackoverflow.com/questions/30217956/error-variable-cannot-be-implicitly-captured-because-no-default-capture-mode-h // case 1: // if(names) stub = (rho == 1) ? "D" : "QD"; // QD for quasi-differences ! // return fdiffgrowthCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho](double y, double x) { return y-rho*x; }); // return y-x; same efficiency as return y-rho*x; when rho = 1 -> smart compiler !, and reduced file size !! // case 2: // if(rho == 1) goto fastld; // if(names) stub = "QDlog"; // return fdiffgrowthCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho](double y, double x) { return R_log(y)-rho*R_log(x); }); // log(y*(1/(rho*x))) gives log(y) - log(rho*x), but we want log(y) - rho*log(x) // case 3: // if(names) stub = "G"; // return fdiffgrowthCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho](double y, double x) { return (y-x)*(rho/x); }); // same speed as fixing 100 ! Faster using (y/x-1)*rho or (x*(1/x)-1)*rho ? // case 4: // fastld: // if(names) stub = "Dlog"; // return fdiffgrowthCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho](double y, double x) { return rho*R_log(y*(1/x)); }); // default: stop("Unknown return option!"); // } // } collapse/src/Makevars.win0000644000176200001440000000002314172374744015143 0ustar liggesusers PKG_CFLAGS += -O3 collapse/src/fprod.cpp0000644000176200001440000003467114174223734014503 0ustar liggesusers#include using namespace Rcpp; // Note: For weighted computations the model code is fmean.cpp. // [[Rcpp::export]] NumericVector fprodCpp(const NumericVector& x, int ng = 0, const IntegerVector& g = 0, const SEXP& w = R_NilValue, bool narm = true) { int l = x.size(); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 if(Rf_isNull(w)) { // No weights if(ng == 0) { if(narm) { int j = l-1; long double prod = x[j]; while(std::isnan(prod) && j!=0) prod = x[--j]; if(j != 0) for(int i = j; i--; ) { if(!std::isnan(x[i])) prod *= x[i]; // Fastest ? } return Rf_ScalarReal((double)prod); } else { long double prod = 1; for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { prod = x[i]; break; } else { prod *= x[i]; } } return Rf_ScalarReal((double)prod); } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericVector prod(ng, NA_REAL); // Other way ? for(int i = l; i--; ) { if(!std::isnan(x[i])) { // faster way to code this ? -> Not Bad at all if(std::isnan(prod[g[i]-1])) prod[g[i]-1] = x[i]; else prod[g[i]-1] *= x[i]; } } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, prod); return prod; } else { NumericVector prod(ng, 1.0); // good? -> yes int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { if(!std::isnan(prod[g[i]-1])) { prod[g[i]-1] = x[i]; ++ngs; if(ngs == ng) break; } } else { prod[g[i]-1] *= x[i]; } } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, prod); return prod; } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); if(ng == 0) { if(narm) { int j = l-1; while((std::isnan(x[j]) || std::isnan(wg[j])) && j!=0) --j; long double prod = x[j]*wg[j]; if(j != 0) for(int i = j; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i])) continue; prod *= x[i]*wg[i]; } return Rf_ScalarReal((double)prod); } else { long double prod = 1; for(int i = 0; i != l; ++i) { if(std::isnan(x[i]) || std::isnan(wg[i])) { prod = x[i]+wg[i]; break; } else { prod *= x[i]*wg[i]; } } return Rf_ScalarReal((double)prod); } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericVector prod(ng, NA_REAL); for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i])) continue; if(std::isnan(prod[g[i]-1])) prod[g[i]-1] = x[i]*wg[i]; else prod[g[i]-1] *= x[i]*wg[i]; } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, prod); return prod; } else { NumericVector prod(ng, 1.0); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(x[i]) || std::isnan(wg[i])) { if(!std::isnan(prod[g[i]-1])) { prod[g[i]-1] = x[i]+wg[i]; ++ngs; if(ngs == ng) break; } } else { prod[g[i]-1] *= x[i]*wg[i]; } } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, prod); return prod; } } } } // [[Rcpp::export]] SEXP fprodmCpp(const NumericMatrix& x, int ng = 0, const IntegerVector& g = 0, const SEXP& w = R_NilValue, bool narm = true, bool drop = true) { int l = x.nrow(), col = x.ncol(); if(Rf_isNull(w)) { // No weights if(ng == 0) { NumericVector prod = no_init_vector(col); // Initialize faster -> Nope if(narm) { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); int k = l-1; long double prodj = column[k]; while(std::isnan(prodj) && k!=0) prodj = column[--k]; if(k != 0) for(int i = k; i--; ) { if(!std::isnan(column[i])) prodj *= column[i]; } prod[j] = (double)prodj; } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); long double prodj = 1; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { prodj = column[i]; break; } else { prodj *= column[i]; } } prod[j] = (double)prodj; } } if(drop) Rf_setAttrib(prod, R_NamesSymbol, colnames(x)); else { Rf_dimgets(prod, Dimension(1, col)); colnames(prod) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, prod); } return prod; } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericMatrix prod = no_init_matrix(ng, col); std::fill(prod.begin(), prod.end(), NA_REAL); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column prodj = prod( _ , j); for(int i = l; i--; ) { if(!std::isnan(column[i])) { if(std::isnan(prodj[g[i]-1])) prodj[g[i]-1] = column[i]; else prodj[g[i]-1] *= column[i]; } } } colnames(prod) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, prod); return prod; } else { NumericMatrix prod = no_init_matrix(ng, col); // no init numerically unstable std::fill(prod.begin(), prod.end(), 1.0); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column prodj = prod( _ , j); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { if(!std::isnan(prodj[g[i]-1])) { prodj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } } else { prodj[g[i]-1] *= column[i]; } } } colnames(prod) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, prod); return prod; } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match nrow(X)"); if(ng == 0) { NumericVector prod = no_init_vector(col); if(narm) { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); int k = l-1; while((std::isnan(column[k]) || std::isnan(wg[k])) && k!=0) --k; long double prodj = column[k]*wg[k]; if(k != 0) for(int i = k; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; prodj *= column[i]*wg[i]; } prod[j] = (double)prodj; } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); long double prodj = 1; for(int i = 0; i != l; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { prodj = column[i]+wg[i]; break; } else { prodj *= column[i]*wg[i]; } } prod[j] = (double)prodj; } } if(drop) Rf_setAttrib(prod, R_NamesSymbol, colnames(x)); else { Rf_dimgets(prod, Dimension(1, col)); colnames(prod) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, prod); } return prod; } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericMatrix prod = no_init_matrix(ng, col); std::fill(prod.begin(), prod.end(), NA_REAL); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column prodj = prod( _ , j); for(int i = l; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; if(std::isnan(prodj[g[i]-1])) { prodj[g[i]-1] = column[i]*wg[i]; } else { prodj[g[i]-1] *= column[i]*wg[i]; } } } colnames(prod) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, prod); return prod; } else { NumericMatrix prod = no_init_matrix(ng, col); std::fill(prod.begin(), prod.end(), 1.0); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column prodj = prod( _ , j); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { if(!std::isnan(prodj[g[i]-1])) { prodj[g[i]-1] = column[i]+wg[i]; ++ngs; if(ngs == ng) break; } } else { prodj[g[i]-1] *= column[i]*wg[i]; } } } colnames(prod) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, prod); return prod; } } } } // [[Rcpp::export]] SEXP fprodlCpp(const List& x, int ng = 0, const IntegerVector& g = 0, const SEXP& w = R_NilValue, bool narm = true, bool drop = true) { int l = x.size(); if(Rf_isNull(w)) { // No weights if(ng == 0) { NumericVector prod(l); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; int k = column.size()-1; long double prodi = column[k]; while(std::isnan(prodi) && k!=0) prodi = column[--k]; if(k != 0) for(int i = k; i--; ) { if(!std::isnan(column[i])) prodi *= column[i]; } prod[j] = (double)prodi; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; long double prodi = 1; int row = column.size(); for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) { prodi = column[i]; break; } else { prodi *= column[i]; } } prod[j] = (double)prodi; } } if(drop) { Rf_setAttrib(prod, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return prod; } else { List out(l); for(int j = l; j--; ) { out[j] = prod[j]; SHALLOW_DUPLICATE_ATTRIB(out[j], x[j]); } SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_RowNamesSymbol, Rf_ScalarInteger(1)); return out; } } else { // With groups List prod(l); int gss = g.size(); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector prodj(ng, NA_REAL); for(int i = gss; i--; ) { if(!std::isnan(column[i])) { if(std::isnan(prodj[g[i]-1])) prodj[g[i]-1] = column[i]; else prodj[g[i]-1] *= column[i]; } } SHALLOW_DUPLICATE_ATTRIB(prodj, column); prod[j] = prodj; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector prodj(ng, 1.0); int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(column[i])) { if(!std::isnan(prodj[g[i]-1])) { prodj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } } else { prodj[g[i]-1] *= column[i]; } } SHALLOW_DUPLICATE_ATTRIB(prodj, column); prod[j] = prodj; } } SHALLOW_DUPLICATE_ATTRIB(prod, x); Rf_setAttrib(prod, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return prod; } } else { // With weights NumericVector wg = w; int wgs = wg.size(); if (ng == 0) { NumericVector prod(l); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(column.size() != wgs) stop("length(w) must match nrow(X)"); int k = wgs-1; while((std::isnan(column[k]) || std::isnan(wg[k])) && k!=0) --k; long double prodi = column[k]*wg[k]; if(k != 0) for(int i = k; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; prodi *= column[i]*wg[i]; } prod[j] = (double)prodi; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; if(column.size() != wgs) stop("length(w) must match nrow(X)"); long double prodi = 1; for(int i = 0; i != wgs; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { prodi = column[i]+wg[i]; break; } else { prodi *= column[i]*wg[i]; } } prod[j] = (double)prodi; } } if(drop) { Rf_setAttrib(prod, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return prod; } else { List out(l); for(int j = l; j--; ) { out[j] = prod[j]; SHALLOW_DUPLICATE_ATTRIB(out[j], x[j]); } SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_RowNamesSymbol, Rf_ScalarInteger(1)); return out; } } else { // With groups List prod(l); int gss = g.size(); if(wgs != gss) stop("length(w) must match length(g)"); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector prodj(ng, NA_REAL); for(int i = gss; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; if(std::isnan(prodj[g[i]-1])) { prodj[g[i]-1] = column[i]*wg[i]; } else { prodj[g[i]-1] *= column[i]*wg[i]; } } SHALLOW_DUPLICATE_ATTRIB(prodj, column); prod[j] = prodj; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector prodj(ng, 1.0); int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { if(!std::isnan(prodj[g[i]-1])) { prodj[g[i]-1] = column[i]+wg[i]; ++ngs; if(ngs == ng) break; } } else { prodj[g[i]-1] *= column[i]*wg[i]; } } SHALLOW_DUPLICATE_ATTRIB(prodj, column); prod[j] = prodj; } } SHALLOW_DUPLICATE_ATTRIB(prod, x); Rf_setAttrib(prod, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return prod; } } } collapse/src/flag.cpp0000644000176200001440000012711414174223734014275 0ustar liggesusers// [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; // 7th version: Irregular time series and panels supported ! template Vector flagleadCppImpl(const Vector& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { // typedef typename Rcpp::traits::storage_type::type storage_t; // storage_t fil; Vector fil(1); if(Rf_isNull(fill)) { // fill != fill // Not necessary !! fil = Vector::get_na(); } else { fil = as >(fill); //as(fill); -> doesn't work for Character vector fill !! } auto ff = fil[0]; int l = x.size(), ns = n.size(), prev = INT_MAX; IntegerVector absn = no_init_vector(ns); for(int i = 0; i != ns; ++i) { if(n[i] == prev) stop("duplicated values in n detected"); // because one might mistakenly pass a factor to the n-slot !! prev = n[i]; if(prev < 0) { if(prev == NA_INTEGER) stop("NA in n"); absn[i] = -prev; } else absn[i] = prev; } if(ns == 1) names = false; CharacterVector nc = names ? Rf_coerceVector(absn, STRSXP) : NA_STRING; // NumericVector(abs(n)) CharacterVector colnam = names ? no_init_vector(ns) : no_init_vector(1); Matrix out = no_init_matrix(l, ns); if(ng == 0) { // No groups if(Rf_isNull(t)) { // Ordered data for(int p = ns; p--; ) { int np = n[p]; if(absn[p] > l) stop("lag-length exceeds length of vector"); MatrixColumn outp = out( _ , p); if(np>0) { if(names) colnam[p] = "L" + nc[p]; int i = 0; while(i != np) outp[i++] = ff; for( ; i != l; ++i) outp[i] = x[i - np]; } else if(np<0) { if(names) colnam[p] = "F" + nc[p]; int i = l, st = l+np; while(i != st) outp[--i] = ff; for( ; i--; ) outp[i] = x[i - np]; } else { if(names) colnam[p] = "--"; outp = x; } } } else { // Unordered data: Timevar provided IntegerVector ord = t; if(l != ord.size()) stop("length(x) must match length(t)"); int min = INT_MAX, max = INT_MIN, osize, temp; for(int i = 0; i != l; ++i) { if(ord[i] < min) min = ord[i]; if(ord[i] > max) max = ord[i]; } if(min == NA_INTEGER) stop("Timevar contains missing values"); osize = max-min+1; if(osize > 10000000 && osize > 3 * l) warning("Your time series is very irregular. Need to create an internal ordering vector of length %s to represent it.", osize); IntegerVector omap(osize), ord2 = no_init_vector(l); for(int i = 0; i != l; ++i) { temp = ord[i] - min; // Best ? Or direct assign to ord2[i] ? Also check for panel version.. if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i+1; ord2[i] = temp; } // return as >(omap); for(int p = ns; p--; ) { int np = n[p]; if(absn[p] > l) stop("lag-length exceeds length of vector"); MatrixColumn outp = out( _ , p); if(np>0) { if(names) colnam[p] = "L" + nc[p]; for(int i = 0; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] >= np && (temp = omap[ord2[i] - np])) { outp[i] = x[temp-1]; } else { outp[i] = ff; } } } else if(np<0) { if(names) colnam[p] = "F" + nc[p]; for(int i = 0, osnp = osize+np; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] < osnp && (temp = omap[ord2[i] - np])) { outp[i] = x[temp-1]; } else { outp[i] = ff; } } } else { if(names) colnam[p] = "--"; outp = x; } } } } else { // With groups if(l != g.size()) stop("length(x) must match length(g)"); int ags = l/ng, ngp = ng+1; if(Rf_isNull(t)) { // Ordered data // int seen[ngp], memsize = sizeof(int)*ngp; for(int p = ns; p--; ) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); MatrixColumn outp = out( _ , p); if(np>0) { if(names) colnam[p] = "L" + nc[p]; std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[i]] == np) { outp[i] = x[i-np]; } else { outp[i] = ff; ++seen[g[i]]; } } } else if(np<0) { std::vector seen(ngp); // memset(seen, 0, memsize); if(names) colnam[p] = "F" + nc[p]; for(int i = l; i--; ) { // good?? if(seen[g[i]] == np) { outp[i] = x[i-np]; } else { outp[i] = ff; --seen[g[i]]; } } } else { if(names) colnam[p] = "--"; outp = x; } } } else { // Unordered data: Timevar provided IntegerVector ord = t; int temp; if(l != ord.size()) stop("length(x) must match length(t)"); IntegerVector min(ngp, INT_MAX), max(ngp, INT_MIN), cgs = no_init_vector(ngp); for(int i = 0; i != l; ++i) { temp = g[i]; if(ord[i] < min[temp]) min[temp] = ord[i]; if(ord[i] > max[temp]) max[temp] = ord[i]; } temp = 0; for(int i = 1; i != ngp; ++i) { if(min[i] == NA_INTEGER) stop("Timevar contains missing values"); if(min[i] == INT_MAX) continue; // Needed in case of unused factor levels (group vector too large) cgs[i] = temp; // This needs to b here (for unused factor levels case...) max[i] -= min[i] - 1; // need max[i] which stores the complete group sizes only if p<0 e.g. if computing leads.. temp += max[i]; } // omap provides the ordering to order the vector (needed to find previous / next values) if(temp > 10000000 && temp > 3 * l) warning("Your panel is very irregular. Need to create an internal ordering vector of length %s to represent it.", temp); IntegerVector omap(temp), ord2 = no_init_vector(l); for(int i = 0; i != l; ++i) { ord2[i] = ord[i] - min[g[i]]; temp = cgs[g[i]] + ord2[i]; if(omap[temp]) stop("Repeated values of timevar within one or more groups"); omap[temp] = i+1; // needed to add 1 to distinguish between 0 and gap } for(int p = ns; p--; ) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); MatrixColumn outp = out( _ , p); if(np>0) { if(names) colnam[p] = "L" + nc[p]; for(int i = 0; i != l; ++i) { if(ord2[i] >= np && (temp = omap[cgs[g[i]]+ord2[i]-np])) { outp[i] = x[temp-1]; } else { outp[i] = ff; } } } else if(np<0) { if(names) colnam[p] = "F" + nc[p]; for(int i = 0; i != l; ++i) { if(ord2[i] < max[g[i]]+np && (temp = omap[cgs[g[i]]+ord2[i]-np])) { outp[i] = x[temp-1]; } else { outp[i] = ff; } } } else { if(names) colnam[p] = "--"; outp = x; } } } } SHALLOW_DUPLICATE_ATTRIB(out, x); if(ns != 1) { Rf_setAttrib(out, R_NamesSymbol, R_NilValue); Rf_dimgets(out, Dimension(l, ns)); if(Rf_isObject(x)) { // && !x.inherits("pseries") -> lag matrix in plm is not a pseries anymore anyway... CharacterVector classes = Rf_getAttrib(out, R_ClassSymbol); classes.push_back("matrix"); // classes.push_back("array"); // mts does not have class array... Rf_classgets(out, classes); } // else { // Rf_classgets(out, Rf_mkString("matrix")); // } if(names) Rf_dimnamesgets(out, List::create(Rf_getAttrib(x, R_NamesSymbol), colnam)); // out.attr("class") = CharacterVector::create(x.attr("class"),"matrix"); } return out; } template <> Vector flagleadCppImpl(const Vector& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { stop("Not supported SEXP type!"); } template <> Vector flagleadCppImpl(const Vector& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { stop("Not supported SEXP type!"); } template <> Vector flagleadCppImpl(const Vector& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { stop("Not supported SEXP type!"); } template <> Vector flagleadCppImpl(const Vector& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] SEXP flagleadCpp(SEXP x, IntegerVector n = 1, SEXP fill = R_NilValue, int ng = 0, IntegerVector g = 0, SEXP t = R_NilValue, bool names = true){ RCPP_RETURN_VECTOR(flagleadCppImpl, x, n, fill, ng, g, t, names); } inline SEXP coln_check(SEXP x) { if(Rf_isNull(x)) return NA_STRING; else return x; // Rf_coerceVector(x, STRSXP); } template Matrix flagleadmCppImpl(const Matrix& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { Vector fil(1); if(Rf_isNull(fill)) { // || fill != fill not necessary !! fil = Vector::get_na(); } else { fil = as >(fill); } auto ff = fil[0]; int l = x.nrow(), col = x.ncol(), ns = n.size(), pos = INT_MAX; IntegerVector absn = no_init_vector(ns); for(int i = 0; i != ns; ++i) { if(n[i] == pos) stop("duplicated values in n detected"); // because one might mistakenly pass a factor to the n-slot !! pos = n[i]; if(pos < 0) { if(pos == NA_INTEGER) stop("NA in n"); absn[i] = -pos; } else absn[i] = pos; } pos = 0; CharacterVector nc = names ? Rf_coerceVector(absn, STRSXP) : NA_STRING; // NumericVector(abs(n)) CharacterVector colnam = names ? no_init_vector(col*ns) : no_init_vector(1); // what if no names ?? CharacterVector coln = names ? coln_check(colnames(x)) : NA_STRING; if(names && coln[0] == NA_STRING) names = false; Matrix out = no_init_matrix(l, col*ns); if(ng == 0) { // No groups if(Rf_isNull(t)) { // Ordered data for(int j = 0; j != col; ++j) { ConstMatrixColumn column = x( _ , j); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > l) stop("lag-length exceeds length of vector"); MatrixColumn outj = out( _ , pos); if(np>0) { if(names) colnam[pos] = "L" + nc[p] + "." + coln[j]; int i = 0; while(i != np) outj[i++] = ff; for( ; i != l; ++i) outj[i] = column[i - np]; } else if(np<0) { if(names) colnam[pos] = "F" + nc[p] + "." + coln[j]; int i = l, st = l+np; while(i != st) outj[--i] = ff; for( ; i--; ) outj[i] = column[i - np]; } else { if(names) colnam[pos] = coln[j]; outj = column; } ++pos; } } } else { // Unordered data: Timevar Provided IntegerVector ord = t; if(l != ord.size()) stop("length(x) must match length(t)"); int min = INT_MAX, max = INT_MIN, osize, temp; for(int i = 0; i != l; ++i) { if(ord[i] < min) min = ord[i]; if(ord[i] > max) max = ord[i]; } if(min == NA_INTEGER) stop("Timevar contains missing values"); osize = max-min+1; if(osize > 10000000 && osize > 3 * l) warning("Your time series is very irregular. Need to create an internal ordering vector of length %s to represent it.", osize); IntegerVector omap(osize), ord2 = no_init_vector(l); for(int i = 0; i != l; ++i) { temp = ord[i] - min; // Best ? Or direct assign to ord2[i] ? Also check for panel version.. if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i+1; ord2[i] = temp; } for(int j = 0; j != col; ++j) { ConstMatrixColumn column = x( _ , j); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > l) stop("lag-length exceeds length of vector"); MatrixColumn outj = out( _ , pos); if(np>0) { if(names) colnam[pos] = "L" + nc[p] + "." + coln[j]; for(int i = 0; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] >= np && (temp = omap[ord2[i] - np])) { outj[i] = column[temp-1]; } else { outj[i] = ff; } } } else if(np<0) { if(names) colnam[pos] = "F" + nc[p] + "." + coln[j]; for(int i = 0, osnp = osize+np; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] < osnp && (temp = omap[ord2[i] - np])) { outj[i] = column[temp-1]; } else { outj[i] = ff; } } } else { if(names) colnam[pos] = coln[j]; outj = column; } ++pos; } } } } else { // With groups if(l != g.size()) stop("length(x) must match length(g)"); int ags = l/ng, ngp = ng+1; if(Rf_isNull(t)) { // Ordered data // int seen[ngp], memsize = sizeof(int)*ngp; for(int j = 0; j != col; ++j) { ConstMatrixColumn column = x( _ , j); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); MatrixColumn outj = out( _ , pos); if(np>0) { if(names) colnam[pos] = "L" + nc[p] + "." + coln[j]; std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[i]] == np) { outj[i] = column[i-np]; } else { outj[i] = ff; ++seen[g[i]]; } } } else if(np<0) { if(names) colnam[pos] = "F" + nc[p] + "." + coln[j]; std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = l; i--; ) { // good?? if(seen[g[i]] == np) { outj[i] = column[i-np]; } else { outj[i] = ff; --seen[g[i]]; } } } else { if(names) colnam[pos] = coln[j]; outj = column; } ++pos; } } } else { // Unordered data: Timevar provided IntegerVector ord = t; int temp; if(l != ord.size()) stop("length(x) must match length(t)"); IntegerVector min(ngp, INT_MAX), max(ngp, INT_MIN), cgs = no_init_vector(ngp); for(int i = 0; i != l; ++i) { temp = g[i]; if(ord[i] < min[temp]) min[temp] = ord[i]; if(ord[i] > max[temp]) max[temp] = ord[i]; } temp = 0; for(int i = 1; i != ngp; ++i) { if(min[i] == NA_INTEGER) stop("Timevar contains missing values"); if(min[i] == INT_MAX) continue; // Needed in case of unused factor levels (group vector too large) cgs[i] = temp; // This needs to b here (for unused factor levels case...) max[i] -= min[i] - 1; // need max[i] which stores the complete group sizes only if p<0 e.g. if computing leads.. temp += max[i]; } // omap provides the ordering to order the vector (needed to find previous / next values) if(temp > 10000000 && temp > 3 * l) warning("Your panel is very irregular. Need to create an internal ordering vector of length %s to represent it.", temp); IntegerVector omap(temp), ord2 = no_init_vector(l), index = no_init_vector(l); for(int i = 0; i != l; ++i) { ord2[i] = ord[i] - min[g[i]]; index[i] = cgs[g[i]] + ord2[i]; if(omap[index[i]]) stop("Repeated values of timevar within one or more groups"); omap[index[i]] = i+1; // needed to add 1 to distinguish between 0 and gap } for(int j = 0; j != col; ++j) { ConstMatrixColumn column = x( _ , j); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); MatrixColumn outj = out( _ , pos); if(np>0) { if(names) colnam[pos] = "L" + nc[p] + "." + coln[j]; for(int i = 0; i != l; ++i) { if(ord2[i] >= np && (temp = omap[index[i]-np])) { outj[i] = column[temp-1]; } else { outj[i] = ff; } } } else if(np<0) { if(names) colnam[pos] = "F" + nc[p] + "." + coln[j]; for(int i = 0; i != l; ++i) { // best loop ?? if(ord2[i] < max[g[i]]+np && (temp = omap[index[i]-np])) { outj[i] = column[temp-1]; } else { outj[i] = ff; } } } else { if(names) colnam[pos] = coln[j]; outj = column; } ++pos; } } } } SHALLOW_DUPLICATE_ATTRIB(out, x); if(ns != 1) Rf_dimgets(out, Dimension(l, col*ns)); if(names) { Rf_dimnamesgets(out, List::create(rownames(x), colnam)); // colnames(out) = colnam deletes row names ! } else if(ns != 1) { Rf_setAttrib(out, R_DimNamesSymbol, R_NilValue); } return out; } template <> Matrix flagleadmCppImpl(const Matrix& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { stop("Not supported SEXP type!"); } template <> Matrix flagleadmCppImpl(const Matrix& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { stop("Not supported SEXP type!"); } template <> Matrix flagleadmCppImpl(const Matrix& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { stop("Not supported SEXP type!"); } template <> Matrix flagleadmCppImpl(const Matrix& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] SEXP flagleadmCpp(SEXP x, IntegerVector n = 1, SEXP fill = R_NilValue, int ng = 0, IntegerVector g = 0, SEXP t = R_NilValue, bool names = true){ RCPP_RETURN_MATRIX(flagleadmCppImpl, x, n, fill, ng, g, t, names); } // [[Rcpp::export]] List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill = R_NilValue, int ng = 0, const IntegerVector& g = 0, const SEXP& t = R_NilValue, bool names = true) { bool lfill = Rf_isNull(fill); if(!lfill && TYPEOF(fill) == LGLSXP) lfill = Rf_asLogical(fill) == NA_LOGICAL; int l = x.size(), ns = n.size(), pos = INT_MAX; List out(l * ns); IntegerVector absn = no_init_vector(ns); for(int i = 0; i != ns; ++i) { if(n[i] == pos) stop("duplicated values in n detected"); // because one might mistakenly pass a factor to the n-slot !! pos = n[i]; if(pos < 0) { if(pos == NA_INTEGER) stop("NA in n"); absn[i] = -pos; } else absn[i] = pos; } pos = 0; CharacterVector nc = names ? Rf_coerceVector(absn, STRSXP) : NA_STRING; // NumericVector(abs(n)) CharacterVector nam = names ? no_init_vector(l*ns) : no_init_vector(1); // what if no names ?? CharacterVector na = names ? coln_check(Rf_getAttrib(x, R_NamesSymbol)) : NA_STRING; if(names && na[0] == NA_STRING) names = false; if(ng == 0) { // No groups if(Rf_isNull(t)) { // Ordered data for(int j = 0; j != l; ++j) { int txj = TYPEOF(x[j]); switch(txj) { case REALSXP: { NumericVector column = x[j]; int row = column.size(); double ff = lfill ? NA_REAL : Rf_asReal(fill); // as() for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > row) stop("lag-length exceeds length of vector"); if(np>0) { NumericVector outjp = no_init_vector(row); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; int i = 0; while(i != np) outjp[i++] = ff; for( ; i != row; ++i) outjp[i] = column[i - np]; SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else if(np<0) { NumericVector outjp = no_init_vector(row); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; int i = row, st = row+np; while(i != st) outjp[--i] = ff; for( ; i--; ) outjp[i] = column[i - np]; SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = column; } ++pos; } break; } case LGLSXP: case INTSXP: { IntegerVector column = x[j]; int row = column.size(); int ff = lfill ? NA_INTEGER : Rf_asInteger(fill); // as() for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > row) stop("lag-length exceeds length of vector"); if(np>0) { IntegerVector outjp = no_init_vector(row); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; int i = 0; while(i != np) outjp[i++] = ff; for( ; i != row; ++i) outjp[i] = column[i - np]; SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP); out[pos] = outjp; } else if(np<0) { IntegerVector outjp = no_init_vector(row); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; int i = row, st = row+np; while(i != st) outjp[--i] = ff; for( ; i--; ) outjp[i] = column[i - np]; SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = x[j]; } ++pos; } break; } case STRSXP: { CharacterVector column = x[j]; int row = column.size(); // String ff = lfill ? NA_STRING : as(fill); // String SEXP ff = lfill ? NA_STRING : Rf_asChar(fill); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > row) stop("lag-length exceeds length of vector"); if(np>0) { CharacterVector outjp = no_init_vector(row); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; int i = 0; while(i != np) outjp[i++] = ff; for( ; i != row; ++i) outjp[i] = column[i - np]; SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else if(np<0) { CharacterVector outjp = no_init_vector(row); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; int i = row, st = row+np; while(i != st) outjp[--i] = ff; for( ; i--; ) outjp[i] = column[i - np]; SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = column; } ++pos; } break; } default: stop("Not supported SEXP type!"); } } } else { // Unordered data: Timevar Provided IntegerVector ord = t; int min = INT_MAX, max = INT_MIN, osize, temp, os = ord.size(); if(Rf_length(x[0]) != os) stop("nrow(x) must match length(t)"); for(int i = 0; i != os; ++i) { if(ord[i] < min) min = ord[i]; if(ord[i] > max) max = ord[i]; } if(min == NA_INTEGER) stop("Timevar contains missing values"); osize = max-min+1; if(osize > 10000000 && osize > 3 * os) warning("Your time series is very irregular. Need to create an internal ordering vector of length %s to represent it.", osize); IntegerVector omap(osize), ord2 = no_init_vector(os); for(int i = 0; i != os; ++i) { temp = ord[i] - min; // Best ? Or direct assign to ord2[i] ? Also check for panel version.. if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i+1; ord2[i] = temp; } for(int j = 0; j != l; ++j) { int txj = TYPEOF(x[j]); switch(txj) { case REALSXP: { NumericVector column = x[j]; if(os != column.size()) stop("nrow(x) must match length(t)"); double ff = lfill ? NA_REAL : Rf_asReal(fill); // as( for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > os) stop("lag-length exceeds length of vector"); if(np>0) { NumericVector outjp = no_init_vector(os); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; for(int i = 0; i != os; ++i) { // Smarter solution using while ??? if(ord2[i] >= np && (temp = omap[ord2[i] - np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else if(np<0) { NumericVector outjp = no_init_vector(os); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; for(int i = 0, osnp = osize+np; i != os; ++i) { // Smarter solution using while ??? if(ord2[i] < osnp && (temp = omap[ord2[i] - np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = column; } ++pos; } break; } case LGLSXP: case INTSXP: { IntegerVector column = x[j]; if(os != column.size()) stop("length(x) must match length(t)"); int ff = lfill ? NA_INTEGER : Rf_asInteger(fill); // as( for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > os) stop("lag-length exceeds length of vector"); if(np>0) { IntegerVector outjp = no_init_vector(os); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; for(int i = 0; i != os; ++i) { // Smarter solution using while ??? if(ord2[i] >= np && (temp = omap[ord2[i] - np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP); out[pos] = outjp; } else if(np<0) { IntegerVector outjp = no_init_vector(os); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; for(int i = 0, osnp = osize+np; i != os; ++i) { // Smarter solution using while ??? if(ord2[i] < osnp && (temp = omap[ord2[i] - np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = x[j]; } ++pos; } break; } case STRSXP: { CharacterVector column = x[j]; if(os != column.size()) stop("length(x) must match length(t)"); // String ff = lfill ? NA_STRING : as(fill); // String ?? SEXP ff = lfill ? NA_STRING : Rf_asChar(fill); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > os) stop("lag-length exceeds length of vector"); if(np>0) { CharacterVector outjp = no_init_vector(os); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; for(int i = 0; i != os; ++i) { // Smarter solution using while ??? if(ord2[i] >= np && (temp = omap[ord2[i] - np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else if(np<0) { CharacterVector outjp = no_init_vector(os); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; for(int i = 0, osnp = osize+np; i != os; ++i) { // Smarter solution using while ??? if(ord2[i] < osnp && (temp = omap[ord2[i] - np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = column; } ++pos; } break; } default: stop("Not supported SEXP type!"); } } } } else { // With groups int gss = g.size(), ags = gss/ng, ngp = ng+1, temp = 0; if(Rf_isNull(t)) { // Ordered data std::vector seen(ngp); // int seen[ngp], memsize = sizeof(int)*ngp; for(int j = 0; j != l; ++j) { int txj = TYPEOF(x[j]); switch(txj) { case REALSXP: { NumericVector column = x[j]; double ff = lfill ? NA_REAL : Rf_asReal(fill); // as() if(gss != column.size()) stop("length(x) must match length(g)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { NumericVector outjp = no_init_vector(gss); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; seen.assign(ngp, 0); // std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != gss; ++i) { if(seen[g[i]] == np) { outjp[i] = column[i-np]; } else { outjp[i] = ff; ++seen[g[i]]; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else if(np<0) { NumericVector outjp = no_init_vector(gss); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; seen.assign(ngp, 0); //std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = gss; i--; ) { // good?? if(seen[g[i]] == np) { outjp[i] = column[i-np]; } else { outjp[i] = ff; --seen[g[i]]; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = column; } ++pos; } break; } case LGLSXP: case INTSXP: { IntegerVector column = x[j]; int ff = lfill ? NA_INTEGER : Rf_asInteger(fill); // as() if(gss != column.size()) stop("length(x) must match length(g)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { IntegerVector outjp = no_init_vector(gss); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; seen.assign(ngp, 0); //std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != gss; ++i) { if(seen[g[i]] == np) { outjp[i] = column[i-np]; } else { outjp[i] = ff; ++seen[g[i]]; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP); out[pos] = outjp; } else if(np<0) { IntegerVector outjp = no_init_vector(gss); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; seen.assign(ngp, 0); //std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = gss; i--; ) { // good?? if(seen[g[i]] == np) { outjp[i] = column[i-np]; } else { outjp[i] = ff; --seen[g[i]]; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = x[j]; } ++pos; } break; } case STRSXP: { CharacterVector column = x[j]; // String ff = lfill ? NA_STRING : as(fill); // String ?? SEXP ff = lfill ? NA_STRING : Rf_asChar(fill); if(gss != column.size()) stop("length(x) must match length(g)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { CharacterVector outjp = no_init_vector(gss); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; seen.assign(ngp, 0); //std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != gss; ++i) { if(seen[g[i]] == np) { outjp[i] = column[i-np]; } else { outjp[i] = ff; ++seen[g[i]]; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else if(np<0) { CharacterVector outjp = no_init_vector(gss); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; seen.assign(ngp, 0); //std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = gss; i--; ) { // good?? if(seen[g[i]] == np) { outjp[i] = column[i-np]; } else { outjp[i] = ff; --seen[g[i]]; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = column; } ++pos; } break; } default: stop("Not supported SEXP type!"); } } } else { // Unordered data: Timevar provided IntegerVector ord = t; if(gss != ord.size()) stop("length(g) must match length(t)"); IntegerVector min(ngp, INT_MAX), max(ngp, INT_MIN), cgs = no_init_vector(ngp); // return List::create(min, max); // Note: INT_MIN is the same as NA_INTEGER for(int i = 0; i != gss; ++i) { temp = g[i]; if(ord[i] < min[temp]) min[temp] = ord[i]; if(ord[i] > max[temp]) max[temp] = ord[i]; } temp = 0; for(int i = 1; i != ngp; ++i) { if(min[i] == NA_INTEGER) stop("Timevar contains missing values"); if(min[i] == INT_MAX) continue; // Needed in case of unused factor levels (group vector too large) cgs[i] = temp; // This needs to b here (for unused factor levels case...) max[i] -= min[i] - 1; // need max[i] which stores the complete group sizes only if p<0 e.g. if computing leads.. temp += max[i]; // + max[i] - min[i] + 1; } // if(min[ng] == NA_INTEGER) stop("Timevar contains missing values"); // if(min[ng] != INT_MAX) { // max[ng] -= min[ng] - 1; // temp += max[ng]; // } // return List::create(cgs, min, max); // index stores the position of the current observation in the ordered vector // omap provides the ordering to order the vector (needed to find previous / next values) if(temp > 10000000 && temp > 3 * gss) warning("Your panel is very irregular. Need to create an internal ordering vector of length %s to represent it.", temp); IntegerVector omap(temp), ord2 = no_init_vector(gss), index = no_init_vector(gss); for(int i = 0; i != gss; ++i) { ord2[i] = ord[i] - min[g[i]]; // Need ord2 can get rid of any part ?? ?? // if(ord2[i] >= gsv[g[i]-1]) stop("Gaps in timevar within one or more groups"); index[i] = cgs[g[i]] + ord2[i]; if(omap[index[i]]) stop("Repeated values of timevar within one or more groups"); omap[index[i]] = i+1; // needed to add 1 to distinguish between 0 and gap } // return List::create(cgs, min, max, ord2, index, omap); for(int j = 0; j != l; ++j) { int txj = TYPEOF(x[j]); switch(txj) { case REALSXP: { NumericVector column = x[j]; double ff = lfill ? NA_REAL : Rf_asReal(fill); // as() if(gss != column.size()) stop("length(x) must match length(g)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { NumericVector outjp = no_init_vector(gss); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; for(int i = 0; i != gss; ++i) { if(ord2[i] >= np && (temp = omap[index[i]-np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else if(np<0) { NumericVector outjp = no_init_vector(gss); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; for(int i = 0; i != gss; ++i) { // best loop ?? if(ord2[i] < max[g[i]]+np && (temp = omap[index[i]-np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = column; } ++pos; } break; } case LGLSXP: case INTSXP: { IntegerVector column = x[j]; int ff = lfill ? NA_INTEGER : Rf_asInteger(fill); // as if(gss != column.size()) stop("length(x) must match length(g)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { IntegerVector outjp = no_init_vector(gss); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; for(int i = 0; i != gss; ++i) { if(ord2[i] >= np && (temp = omap[index[i]-np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP); out[pos] = outjp; } else if(np<0) { IntegerVector outjp = no_init_vector(gss); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; for(int i = 0; i != gss; ++i) { // best loop ?? if(ord2[i] < max[g[i]]+np && (temp = omap[index[i]-np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = x[j]; } ++pos; } break; } case STRSXP: { CharacterVector column = x[j]; // String ff = lfill ? NA_STRING : as(fill); SEXP ff = lfill ? NA_STRING : Rf_asChar(fill); if(gss != column.size()) stop("length(x) must match length(g)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { CharacterVector outjp = no_init_vector(gss); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; for(int i = 0; i != gss; ++i) { if(ord2[i] >= np && (temp = omap[index[i]-np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else if(np<0) { CharacterVector outjp = no_init_vector(gss); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; for(int i = 0; i != gss; ++i) { // best loop ?? if(ord2[i] < max[g[i]]+np && (temp = omap[index[i]-np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = column; } ++pos; } break; } default: stop("Not supported SEXP type!"); } } } } SHALLOW_DUPLICATE_ATTRIB(out, x); if(names) { // best way to code this ?? Rf_namesgets(out, nam); } else { if(ns != 1) Rf_setAttrib(out, R_NamesSymbol, R_NilValue); } return out; } collapse/src/fsum.c0000644000176200001440000002231514172367040013770 0ustar liggesusers#include "collapse_c.h" // #include void fsum_double_impl(double *pout, double *px, int ng, int *pg, int narm, int l) { if(ng == 0) { double sum; if(narm) { int j = l-1; sum = px[j]; while(ISNAN(sum) && j!=0) sum = px[--j]; if(j != 0) for(int i = j; i--; ) { if(NISNAN(px[i])) sum += px[i]; // Fastest ? } } else { sum = 0; for(int i = 0; i != l; ++i) { if(ISNAN(px[i])) { sum = px[i]; break; } else { sum += px[i]; } } } pout[0] = sum; } else { if(narm) { for(int i = ng; i--; ) pout[i] = NA_REAL; // Other way ? --pout; for(int i = l; i--; ) { if(NISNAN(px[i])) { // faster way to code this ? -> Not Bad at all if(ISNAN(pout[pg[i]])) pout[pg[i]] = px[i]; else pout[pg[i]] += px[i]; } } } else { memset(pout, 0.0, sizeof(double) * ng); --pout; for(int i = l; i--; ) pout[pg[i]] += px[i]; // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. } } } void fsum_weights_impl(double *pout, double *px, int ng, int *pg, double *pw, int narm, int l) { if(ng == 0) { double sum; if(narm) { int j = l-1; while((ISNAN(px[j]) || ISNAN(pw[j])) && j!=0) --j; sum = px[j] * pw[j]; if(j != 0) for(int i = j; i--; ) { if(ISNAN(px[i]) || ISNAN(pw[i])) continue; sum += px[i] * pw[i]; } } else { sum = 0; for(int i = 0; i != l; ++i) { if(ISNAN(px[i]) || ISNAN(pw[i])) { sum = px[i] + pw[i]; break; } else { sum += px[i] * pw[i]; } } } pout[0] = sum; } else { if(narm) { for(int i = ng; i--; ) pout[i] = NA_REAL; // Other way ? --pout; for(int i = l; i--; ) { if(ISNAN(px[i]) || ISNAN(pw[i])) continue; if(ISNAN(pout[pg[i]])) pout[pg[i]] = px[i] * pw[i]; else pout[pg[i]] += px[i] * pw[i]; } } else { memset(pout, 0.0, sizeof(double) * ng); --pout; for(int i = l; i--; ) pout[pg[i]] += px[i] * pw[i]; // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. } } } // using long long internally is substantially faster than using doubles !! double fsum_int_impl(int *px, int narm, int l) { long long sum; if(narm) { int j = l-1; while(px[j] == NA_INTEGER && j!=0) --j; sum = (long long)px[j]; if(j == 0 && (l > 1 || px[j] == NA_INTEGER)) return NA_REAL; for(int i = j; i--; ) if(px[i] != NA_INTEGER) sum += (long long)px[i]; } else { sum = 0; for(int i = 0; i != l; ++i) { if(px[i] == NA_INTEGER) return NA_REAL; sum += (long long)px[i]; } } return (double)sum; } void fsum_int_g_impl(int *pout, int *px, int ng, int *pg, int narm, int l) { long long ckof; if(narm) { for(int i = ng; i--; ) pout[i] = NA_INTEGER; --pout; for(int i = l, lsi; i--; ) { if(px[i] != NA_INTEGER) { lsi = pout[pg[i]]; if(lsi == NA_INTEGER) pout[pg[i]] = px[i]; else { ckof = (long long)lsi + px[i]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); pout[pg[i]] = (int)ckof; } } } } else { memset(pout, 0, sizeof(int) * ng); --pout; for(int i = l, lsi; i--; ) { if(px[i] == NA_INTEGER) { pout[pg[i]] = NA_INTEGER; continue; } lsi = pout[pg[i]]; if(lsi != NA_INTEGER) { // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. ckof = (long long)lsi + px[i]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); pout[pg[i]] = (int)ckof; } } } } SEXP fsumC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm) { int l = length(x), tx = TYPEOF(x), ng = asInteger(Rng), narm = asLogical(Rnarm), nprotect = 1, nwl = isNull(w); // ALTREP methods for compact sequences: not safe yet and not part of the API. // if(ALTREP(x) && ng == 0 && nwl) { // switch(tx) { // case INTSXP: return ALTINTEGER_SUM(x, (Rboolean)narm); // case LGLSXP: return ALTLOGICAL_SUM(x, (Rboolean)narm); // case REALSXP: return ALTREAL_SUM(x, (Rboolean)narm); // default: error("ALTREP object must be integer or real typed"); // } // } if (l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng && l != length(g)) error("length(g) must match length(x)"); if(tx == LGLSXP) tx = INTSXP; SEXP out; if(!(ng == 0 && nwl && tx == INTSXP)) out = PROTECT(allocVector(nwl ? tx : REALSXP, ng == 0 ? 1 : ng)); if(nwl) { switch(tx) { case REALSXP: fsum_double_impl(REAL(out), REAL(x), ng, INTEGER(g), narm, l); break; case INTSXP: { if(ng > 0) fsum_int_g_impl(INTEGER(out), INTEGER(x), ng, INTEGER(g), narm, l); else { double sum = fsum_int_impl(INTEGER(x), narm, l); if(sum > INT_MAX || sum <= INT_MIN) return ScalarReal(sum); // INT_MIN is NA_INTEGER return ScalarInteger(ISNAN(sum) ? NA_INTEGER : (int)sum); } break; } default: error("Unsupported SEXP type"); } } else { if(l != length(w)) error("length(w) must match length(x)"); int tw = TYPEOF(w); SEXP xr, wr; double *px, *pw; if(tw != REALSXP) { if(tw != INTSXP && tw != LGLSXP) error("weigths must be double or integer"); wr = PROTECT(coerceVector(w, REALSXP)); pw = REAL(wr); ++nprotect; } else pw = REAL(w); if(tx != REALSXP) { if(tx != INTSXP) error("x must be double or integer"); xr = PROTECT(coerceVector(x, REALSXP)); px = REAL(xr); ++nprotect; } else px = REAL(x); fsum_weights_impl(REAL(out), px, ng, INTEGER(g), pw, narm, l); } if(ng && !isObject(x)) copyMostAttrib(x, out); UNPROTECT(nprotect); return out; } SEXP fsummC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); int tx = TYPEOF(x), l = INTEGER(dim)[0], col = INTEGER(dim)[1], *pg = INTEGER(g), ng = asInteger(Rng), ng1 = ng == 0 ? 1 : ng, narm = asLogical(Rnarm), nprotect = 1, nwl = isNull(w); if (l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng && l != length(g)) error("length(g) must match nrow(x)"); if(tx == LGLSXP) tx = INTSXP; SEXP out = PROTECT(allocVector((nwl && ng > 0) ? tx : REALSXP, ng == 0 ? col : col * ng)); if(nwl) { switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int j = 0; j != col; ++j) fsum_double_impl(pout + j*ng1, px + j*l, ng, pg, narm, l); break; } case INTSXP: { int *px = INTEGER(x); if(ng > 0) { int *pout = INTEGER(out); for(int j = 0; j != col; ++j) fsum_int_g_impl(pout + j*ng1, px + j*l, ng, pg, narm, l); } else { double *pout = REAL(out); int anyoutl = 0; for(int j = 0; j != col; ++j) { double sumj = fsum_int_impl(px + j*l, narm, l); if(sumj > INT_MAX || sumj <= INT_MIN) anyoutl = 1; pout[j] = sumj; } if(anyoutl == 0) { SEXP iout = PROTECT(coerceVector(out, INTSXP)); matCopyAttr(iout, x, Rdrop, ng); UNPROTECT(2); return iout; } } break; } default: error("Unsupported SEXP type"); } } else { if(l != length(w)) error("length(w) must match nrow(x)"); int tw = TYPEOF(w); SEXP xr, wr; double *px, *pw, *pout = REAL(out); if(tw != REALSXP) { if(tw != INTSXP && tw != LGLSXP) error("weigths must be double or integer"); wr = PROTECT(coerceVector(w, REALSXP)); pw = REAL(wr); ++nprotect; } else pw = REAL(w); if(tx != REALSXP) { if(tx != INTSXP) error("x must be double or integer"); xr = PROTECT(coerceVector(x, REALSXP)); px = REAL(xr); ++nprotect; } else px = REAL(x); for(int j = 0; j != col; ++j) fsum_weights_impl(pout + j*ng1, px + j*l, ng, pg, pw, narm, l); } matCopyAttr(out, x, Rdrop, ng); UNPROTECT(nprotect); return out; } SEXP fsumlC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop) { int l = length(x), ng = asInteger(Rng); if(l < 1) return x; // needed ?? if(ng == 0 && asLogical(Rdrop)) { SEXP out = PROTECT(allocVector(REALSXP, l)), *px = SEXPPTR(x); double *pout = REAL(out); for(int j = 0; j != l; ++j) pout[j] = asReal(fsumC(px[j], Rng, g, w, Rnarm)); setAttrib(out, R_NamesSymbol, getAttrib(x, R_NamesSymbol)); UNPROTECT(1); return out; } SEXP out = PROTECT(allocVector(VECSXP, l)), *pout = SEXPPTR(out), *px = SEXPPTR(x); for(int j = 0; j != l; ++j) pout[j] = fsumC(px[j], Rng, g, w, Rnarm); if(ng == 0) for(int j = 0; j != l; ++j) copyMostAttrib(px[j], pout[j]); DFcopyAttr(out, x, ng); UNPROTECT(1); return out; } collapse/src/base_radixsort.h0000644000176200001440000000157414062453207016037 0ustar liggesusers // #include // Not available in C API !! // #include // Not available in C API !! #include #include #include // typedef uint64_t ZPOS64_T; // already defined in stdint.h #define IS_ASCII(x) (LEVELS(x) & 64) // from data.table.h // #define ASCII_MASK (1<<6) // evaluates to 64 !! // # define IS_ASCII(x) ((x)->sxpinfo.gp & ASCII_MASK) // #define IS_ASCII(x) (LEVELS(x) & ASCII_MASK) /* It would be better to find a way to avoid abusing TRUELENGTH, but in the meantime replace TRUELENGTH/SET_TRUELENGTH with TRLEN/SET_TRLEN that cast to int to avoid warnings. */ #define TRLEN(x) ((int) TRUELENGTH(x)) #define SET_TRLEN(x, v) SET_TRUELENGTH(x, ((int) (v))) SEXP Cradixsort(SEXP NA_last, SEXP decreasing, SEXP RETstrt, SEXP RETgs, SEXP SORTStr, SEXP args); void Cdoubleradixsort(int *o, Rboolean NA_last, Rboolean decreasing, SEXP x); collapse/src/psmat.cpp0000644000176200001440000001203313740117252014474 0ustar liggesusers// [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; template Matrix psmatCppImpl(Vector x, IntegerVector g, SEXP t, bool transpose) { int l = x.size(), gss = g.size(); if(gss != l) stop("length(g) must match length(x)"); CharacterVector glevs = Rf_getAttrib(g, R_LevelsSymbol); int *pg = INTEGER(g); int ng = glevs.size(), gs = l/ng, ngp = ng+1; if(Rf_isNull(t)) { if(l%ng != 0) stop("length(x) must be a multiple of length(levels(g))"); std::vector seen(ngp); Matrix out = transpose ? no_init_matrix(gs, ng) : no_init_matrix(ng, gs); if(transpose) { for(int i = 0; i != l; ++i) { if(seen[pg[i]] == gs) stop("Panel not Balanced: Need to supply timevar"); out(seen[pg[i]]++, pg[i]-1) = x[i]; // out[(g[i]-1)*gs + seen[g[i]]++] = x[i]; not really faster... } } else { for(int i = 0; i != l; ++i) { if(seen[pg[i]] == gs) stop("Panel not Balanced: Need to supply timevar"); out(pg[i]-1, seen[pg[i]]++) = x[i]; // out[(seen[g[i]]++)*ng + g[i]-1] = x[i]; not really faster... } } Rf_dimnamesgets(out, transpose ? List::create(seq_len(gs), glevs) : List::create(glevs, seq_len(gs))); Rf_setAttrib(out, Rf_install("transpose"), Rf_ScalarLogical(transpose)); Rf_classgets(out, CharacterVector::create("psmat", "matrix")); return out; } else { int *pt = INTEGER(t); if(l != Rf_length(t)) stop("length(t) must match length(x)"); // int maxt = max(t); // needed ? // check whether t.levels is same size as maxt ? CharacterVector tlevs = Rf_getAttrib(t, R_LevelsSymbol); int nt = tlevs.size(); Matrix out = transpose ? no_init_matrix(nt, ng) : no_init_matrix(ng, nt); // best way to do this ? Stable ? -> Could conditionally create vector and the coerce to matrix -> faster init ? if(nt != gs) std::fill(out.begin(), out.end(), Vector::get_na()); if(transpose) { for(int i = 0; i != l; ++i) out[(pg[i]-1)*nt + pt[i]-1] = x[i]; // out(tt[i]-1, g[i]-1) = x[i]; // tiny bit faster } else { for(int i = 0; i != l; ++i) out[(pt[i]-1)*ng + pg[i]-1] = x[i]; // out(g[i]-1, tt[i]-1) = x[i]; // tiny bit faster } Rf_dimnamesgets(out, transpose ? List::create(tlevs, glevs) : List::create(glevs, tlevs)); Rf_setAttrib(out, Rf_install("transpose"), Rf_ScalarLogical(transpose)); Rf_classgets(out, CharacterVector::create("psmat", "matrix")); return out; } } template <> Matrix psmatCppImpl(Vector x, IntegerVector g, SEXP t, bool transpose) { stop("Not supported SEXP type!"); } template <> Matrix psmatCppImpl(Vector x, IntegerVector g, SEXP t, bool transpose) { stop("Not supported SEXP type!"); } template <> Matrix psmatCppImpl(Vector x, IntegerVector g, SEXP t, bool transpose) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] SEXP psmatCpp(const SEXP& x, const IntegerVector& g, const SEXP& t = R_NilValue, bool transpose = false) { RCPP_RETURN_VECTOR(psmatCppImpl, x, g, t, transpose); } // Only Numeric Version: // // [[Rcpp::export]] // SEXP psmatCpp(NumericVector x, IntegerVector g, SEXP t = R_NilValue, bool transpose = false) { // int l = x.size(), gss = g.size(); // if(gss != l) stop("length(g) must match length(x)"); // CharacterVector glevs = g.attr("levels"); // int ng = glevs.size(), gs = l/ng, ngp = ng+1; // if(Rf_isNull(t)) { // if(l%ng != 0) stop("length(x) must be a multiple of length(levels(g))"); // IntegerVector seen(ngp); // NumericMatrix out = transpose ? no_init_matrix(gs, ng) : no_init_matrix(ng, gs); // if(transpose) { // for(int i = 0; i != l; ++i) { // if(seen[g[i]] == gs) stop("Panel not Balanced: Need to supply timevar"); // out(seen[g[i]]++, g[i]-1) = x[i]; // } // } else { // for(int i = 0; i != l; ++i) { // if(seen[g[i]] == gs) stop("Panel not Balanced: Need to supply timevar"); // out(g[i]-1, seen[g[i]]++) = x[i]; // } // } // out.attr("dimnames") = transpose ? List::create(seq_len(gs), glevs) : List::create(glevs, seq_len(gs)); // return out; // } else { // IntegerVector tt = t; // if(l != tt.size()) stop("length(t) must match length(x)"); // // int maxt = max(tt); // needed ?? // check whether t.levels is same size as maxt ?? // CharacterVector tlevs = tt.attr("levels"); // int nt = tlevs.size(); // NumericMatrix out = transpose ? no_init_matrix(nt, ng) : no_init_matrix(ng, nt); // best way to do this ?? Stable ?? -> Could conditionally create vector and the coerce to matrix -> faster init ?? // if(nt != gs) std::fill(out.begin(), out.end(), NA_REAL); // memset(out, NA_REAL, sizeof(double)*ng*maxt); -> unstable !! // else balanced panel !! // if(transpose) { // for(int i = 0; i != l; ++i) out(tt[i]-1, g[i]-1) = x[i]; // } else { // for(int i = 0; i != l; ++i) out(g[i]-1, tt[i]-1) = x[i]; // } // out.attr("dimnames") = transpose ? List::create(tlevs, glevs) : List::create(glevs, tlevs); // return out; // } // } collapse/src/handle_attributes.c0000644000176200001440000001151414201327077016515 0ustar liggesusers#include #include // See https://github.com/wch/r-source/blob/079f863446b5414dd96f3c29d519e4a654146364/src/main/memory.c // and https://github.com/wch/r-source/blob/80e410a786324e0e472a25481d5dd28db8285330/src/main/attrib.c // https://github.com/wch/r-source/blob/b6f046826c87fc10ad08acd8858921fa1a58e488/doc/manual/R-ints.texi SEXP setAttributes(SEXP x, SEXP a) { SET_ATTRIB(x, Rf_coerceVector(a, LISTSXP)); Rf_classgets(x, Rf_getAttrib(x, R_ClassSymbol)); // forcing class after attribute copy !! return x; } void setattributes(SEXP x, SEXP a) { SET_ATTRIB(x, Rf_coerceVector(a, LISTSXP)); // SET_OBJECT(x, TYPEOF(x)); // if(OBJECT(a)) // This does not work with ts-matrices! could also make compatible with S4 objects ! Rf_classgets(x, Rf_getAttrib(x, R_ClassSymbol)); } // not used ! // SEXP setAttr(SEXP x, SEXP a, SEXP v) { // Rf_setAttrib(x, a, v); // return x; // } // void setattr(SEXP x, SEXP a, SEXP v) { // Rf_setAttrib(x, a, v); // } SEXP duplAttributes(SEXP x, SEXP y) { // also look at data.table's keepattributes ... SHALLOW_DUPLICATE_ATTRIB(x, y); // SET_ATTRIB(x, ATTRIB(y)); return x; } // R_duplicate_attr -> deep copy only of attributes -> expensive if attributes are large ! // Rf_lazy_duplicate -> duplicate on modify -> but modifies object in global environment ! // Rf_shallow_duplicate -> only duplicate pointer? -> best !! // No speed improvement to attr<- (same slow performance for data.frame 'row.names') // SEXP CsetAttr(SEXP object, SEXP a, SEXP v) { // SEXP res = Rf_shallow_duplicate(object); // Rf_setAttrib(res, a, v); // return res; // } // Attribute Handling - 4 Situations: // 1 - x is classed (factor, date, time series), xAG is not classed. i.e. vector of fnobs, fmean etc. // -> Sallow replacing, removing class and levels attributes from x, discard attributes of xAG (if any) // -> or (if type matches i.e. double for date or time series), copy attributes of x unless x is a factor // 2 - x is not classed, xAG is classed (factor, date, time series). - an unusual situation should not occurr - copy attributes of xAG, discard attributes of x // 3 - xAG and x are classed - same as above, keep attributes of xAG, discard attributes of x // 4 - neither x nor xAG are classed - preserve attributes of x, discard attributes of xAG (if any) // // if(Rf_isObject(xAG)) SHALLOW_DUPLICATE_ATTRIB(out, xAG); // else if(!Rf_isObject(x) || (tx == txAG && !Rf_isFactor(x))) SHALLOW_DUPLICATE_ATTRIB(out, x); // else { // SHALLOW_DUPLICATE_ATTRIB(out, x); // Rf_classgets(out, R_NilValue); // OK ! // Rf_setAttrib(out, R_LevelsSymbol, R_NilValue); // if(Rf_isFactor(x)) ? faster ? // } // Can think further about this! but this solution appears acceptable... SEXP copyMostAttributes(SEXP x, SEXP y) { int tx = TYPEOF(x); // -> This is about the best we can do: unlist() does not preserve dates, and we don't want to create malformed factors if(tx == TYPEOF(y) && (tx != INTSXP || OBJECT(x) == OBJECT(y))) { Rf_copyMostAttrib(y, x); return x; } // In any case we can preserve variable labels.. SEXP sym_label = PROTECT(install("label")); SEXP lab = Rf_getAttrib(y, sym_label); if(TYPEOF(lab) != NILSXP) Rf_setAttrib(x, sym_label, lab); UNPROTECT(1); return x; } SEXP CsetAttrib(SEXP object, SEXP a) { int il = TYPEOF(object) == VECSXP; SEXP res = il ? PROTECT(Rf_shallow_duplicate(object)) : object; // needed, otherwise error !! SET_ATTRIB(res, PROTECT(Rf_coerceVector(a, LISTSXP))); Rf_classgets(res, Rf_getAttrib(res, R_ClassSymbol)); UNPROTECT(il+1); return res; } SEXP CcopyAttrib(SEXP to, SEXP from) { int il = TYPEOF(to) == VECSXP; SEXP res = il ? PROTECT(Rf_shallow_duplicate(to)) : to; SHALLOW_DUPLICATE_ATTRIB(res, from); UNPROTECT(il); return res; } SEXP CcopyMostAttrib(SEXP to, SEXP from) { int il = TYPEOF(to) == VECSXP; SEXP res = il ? PROTECT(Rf_shallow_duplicate(to)) : to; Rf_copyMostAttrib(from, res); if(il && isFrame(from) && length(VECTOR_ELT(to, 0)) != length(VECTOR_ELT(from, 0))) { Rf_setAttrib(res, R_RowNamesSymbol, Rf_getAttrib(to, R_RowNamesSymbol)); } UNPROTECT(il); return res; } // No longer needed... // Warning message: In .Call(C_duplattributes, x, y) : converting NULL pointer to R NULL // void duplattributes(SEXP x, SEXP y) { // SHALLOW_DUPLICATE_ATTRIB(x, y); // SET_ATTRIB(x, ATTRIB(y)); // Rf_classgets(x, Rf_getAttrib(y, R_ClassSymbol)); // This solves the warning message !! // just to return R_NilValue; and the SEXP... retrns NULL anyway // } // No longer needed... using copyMostAttributes instead // SEXP cond_duplAttributes(SEXP x, SEXP y) { // if(TYPEOF(x) == TYPEOF(y)) SHALLOW_DUPLICATE_ATTRIB(x, y); // SET_ATTRIB(x, ATTRIB(y)); // return x; // } // not used !! // void cond_duplattributes(SEXP x, SEXP y) { // if(TYPEOF(x) == TYPEOF(y)) SHALLOW_DUPLICATE_ATTRIB(x, y); // SET_ATTRIB(x, ATTRIB(y)); // } collapse/src/fnobs.c0000644000176200001440000001237214062402214014116 0ustar liggesusers#include "collapse_c.h" SEXP fnobsC(SEXP x, SEXP Rng, SEXP g) { int l = length(x), ng = asInteger(Rng); if (ng == 0) { int n = 0; switch(TYPEOF(x)) { case REALSXP: { double *px = REAL(x); for(int i = 0; i != l; ++i) if(px[i] == px[i]) ++n; break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x); for(int i = 0; i != l; ++i) if(px[i] != NA_INTEGER) ++n; break; } case STRSXP: { SEXP *px = STRING_PTR(x); for(int i = 0; i != l; ++i) if(px[i] != NA_STRING) ++n; break; } case VECSXP: { SEXP *px = SEXPPTR(x); for(int i = 0; i != l; ++i) if(length(px[i])) ++n; break; } default: error("Unsupported SEXP type"); } return ScalarInteger(n); } else { // with groups if(length(g) != l) error("length(g) must match NROW(X)"); SEXP n = PROTECT(allocVector(INTSXP, ng)); int *pn = INTEGER(n), *pg = INTEGER(g); memset(pn, 0, sizeof(int) * ng); --pn; switch(TYPEOF(x)) { case REALSXP: { double *px = REAL(x); for(int i = 0; i != l; ++i) if(px[i] == px[i]) ++pn[pg[i]]; break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x); for(int i = 0; i != l; ++i) if(px[i] != NA_INTEGER) ++pn[pg[i]]; break; } case STRSXP: { SEXP *px = STRING_PTR(x); for(int i = 0; i != l; ++i) if(px[i] != NA_STRING) ++pn[pg[i]]; break; } case VECSXP: { SEXP *px = SEXPPTR(x); for(int i = 0; i != l; ++i) if(length(px[i])) ++pn[pg[i]]; break; } default: error("Unsupported SEXP type"); } if(!isObject(x)) { copyMostAttrib(x, n); // SHALLOW_DUPLICATE_ATTRIB(n, x); } else { SEXP sym_label = PROTECT(install("label")); // PROTECT ?? setAttrib(n, sym_label, getAttrib(x, sym_label)); UNPROTECT(1); } UNPROTECT(1); return n; } } SEXP fnobsmC(SEXP x, SEXP Rng, SEXP g, SEXP Rdrop) { SEXP dim = getAttrib(x, R_DimSymbol); // protect ?? if(isNull(dim)) error("x is not a matrix"); int ng = asInteger(Rng), l = INTEGER(dim)[0], col = INTEGER(dim)[1]; SEXP n = PROTECT(allocVector(INTSXP, ng == 0 ? col : ng * col)); int *pn = INTEGER(n); if (ng == 0) { switch(TYPEOF(x)) { case REALSXP: { double *px = REAL(x); for(int j = 0; j != col; ++j) { int nj = 0, end = l * j + l; for(int i = l * j; i != end; ++i) if(NISNAN(px[i])) ++nj; pn[j] = nj; } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x); for(int j = 0; j != col; ++j) { int nj = 0, end = l * j + l; for(int i = l * j; i != end; ++i) if(px[i] != NA_INTEGER) ++nj; pn[j] = nj; } break; } case STRSXP: { SEXP *px = STRING_PTR(x); for(int j = 0; j != col; ++j) { int nj = 0, end = l * j + l; for(int i = l * j; i != end; ++i) if(px[i] != NA_STRING) ++nj; pn[j] = nj; } break; } case VECSXP: { SEXP *px = SEXPPTR(x); for(int j = 0; j != col; ++j) { int nj = 0, end = l * j + l; for(int i = l * j; i != end; ++i) if(length(px[i])) ++nj; pn[j] = nj; } break; } default: error("Unsupported SEXP type"); } } else { // with groups if(length(g) != l) error("length(g) must match NROW(X)"); memset(pn, 0, sizeof(int) * ng * col); pn -= ng + 1; int *pg = INTEGER(g); switch(TYPEOF(x)) { case REALSXP: { double *px = REAL(x)-l; for(int j = 0; j != col; ++j) { pn += ng; px += l; for(int i = 0; i != l; ++i) if(NISNAN(px[i])) ++pn[pg[i]]; } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x)-l; for(int j = 0; j != col; ++j) { pn += ng; px += l; for(int i = 0; i != l; ++i) if(px[i] != NA_INTEGER) ++pn[pg[i]]; } break; } case STRSXP: { SEXP *px = STRING_PTR(x)-l; for(int j = 0; j != col; ++j) { pn += ng; px += l; for(int i = 0; i != l; ++i) if(px[i] != NA_STRING) ++pn[pg[i]]; } break; } case VECSXP: { SEXP *px = SEXPPTR(x)-l; for(int j = 0; j != col; ++j) { pn += ng; px += l; for(int i = 0; i != l; ++i) if(length(px[i])) ++pn[pg[i]]; } break; } default: error("Unsupported SEXP type"); } } matCopyAttr(n, x, Rdrop, ng); UNPROTECT(1); return n; } SEXP fnobslC(SEXP x, SEXP Rng, SEXP g, SEXP Rdrop) { int l = length(x), ng = asInteger(Rng); if(l < 1) return x; if(asLogical(Rdrop) && ng == 0) { SEXP out = PROTECT(allocVector(INTSXP, l)), *px = SEXPPTR(x); int *pout = INTEGER(out); for(int j = 0; j != l; ++j) pout[j] = INTEGER(fnobsC(px[j], Rng, g))[0]; setAttrib(out, R_NamesSymbol, getAttrib(x, R_NamesSymbol)); UNPROTECT(1); return out; } else { SEXP out = PROTECT(allocVector(VECSXP, l)), *pout = SEXPPTR(out), *px = SEXPPTR(x); for(int j = 0; j != l; ++j) pout[j] = fnobsC(px[j], Rng, g); DFcopyAttr(out, x, ng); UNPROTECT(1); return out; } } collapse/src/flast.c0000644000176200001440000002416314071370627014135 0ustar liggesusers#include "collapse_c.h" SEXP flast_impl(SEXP x, int ng, SEXP g, int narm, int *gl) { int l = length(x), tx = TYPEOF(x); if (l < 2) return x; // Prevents seqfault for numeric(0) #101 if (ng == 0) { SEXP out = PROTECT(allocVector(tx, 1)); int j = l-1; if(narm) { switch(tx) { case REALSXP: { double *px = REAL(x); while(ISNAN(px[j]) && j != 0) --j; REAL(out)[0] = px[j]; break; } case STRSXP: { SEXP *px = STRING_PTR(x); while(px[j] == NA_STRING && j != 0) --j; SET_STRING_ELT(out, 0, px[j]); break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x); while(px[j] == NA_INTEGER && j != 0) --j; INTEGER(out)[0] = px[j]; break; } case VECSXP: { SEXP *px = SEXPPTR(x); while(length(px[j]) == 0 && j != 0) --j; SET_VECTOR_ELT(out, 0, px[j]); break; } default: error("Unsupported SEXP type!"); } } else { switch(tx) { case REALSXP: REAL(out)[0] = REAL(x)[l-1]; break; case STRSXP: SET_STRING_ELT(out, 0, STRING_ELT(x, l-1)); break; case INTSXP: case LGLSXP: INTEGER(out)[0] = INTEGER(x)[l-1]; break; case VECSXP: SET_VECTOR_ELT(out, 0, VECTOR_ELT(x, l-1)); break; default: error("Unsupported SEXP type!"); } } copyMostAttrib(x, out); if(!isNull(getAttrib(x, R_NamesSymbol))) namesgets(out, ScalarString(STRING_ELT(getAttrib(x, R_NamesSymbol), j))); UNPROTECT(1); return out; } else { // with groups if(length(g) != l) error("length(g) must match nrow(X)"); SEXP out = PROTECT(allocVector(tx, ng)); if(narm) { int ngs = 0, *pg = INTEGER(g); switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int i = ng; i--; ) pout[i] = NA_REAL; --pout; for(int i = l; i--; ) { if(NISNAN(px[i])) { if(ISNAN(pout[pg[i]])) { pout[pg[i]] = px[i]; if(++ngs == ng) break; } } } break; } case STRSXP: { SEXP *px = STRING_PTR(x), *pout = STRING_PTR(out); for(int i = ng; i--; ) pout[i] = NA_STRING; --pout; for(int i = l; i--; ) { if(px[i] != NA_STRING) { if(pout[pg[i]] == NA_STRING) { pout[pg[i]] = px[i]; if(++ngs == ng) break; } } } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int i = ng; i--; ) pout[i] = NA_INTEGER; --pout; for(int i = l; i--; ) { if(px[i] != NA_INTEGER) { if(pout[pg[i]] == NA_INTEGER) { pout[pg[i]] = px[i]; if(++ngs == ng) break; } } } break; } case VECSXP: { SEXP *px = SEXPPTR(x), *pout = SEXPPTR(out); for(int i = ng; i--; ) pout[i] = R_NilValue; --pout; for(int i = l; i--; ) { if(length(px[i])) { if(pout[pg[i]] == R_NilValue) { pout[pg[i]] = px[i]; if(++ngs == ng) break; } } } break; } default: error("Unsupported SEXP type!"); } } else { switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int i = ng; i--; ) pout[i] = px[gl[i]]; break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int i = ng; i--; ) pout[i] = px[gl[i]]; break; } case STRSXP: case VECSXP: { SEXP *px = SEXPPTR(x), *pout = SEXPPTR(out); for(int i = ng; i--; ) pout[i] = px[gl[i]]; break; } default: error("Unsupported SEXP type!"); } } copyMostAttrib(x, out); UNPROTECT(1); return out; } } SEXP flastC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm) { int *pgl, ng = asInteger(Rng), narm = asLogical(Rnarm); if(ng == 0 || narm) { pgl = &ng; return flast_impl(x, ng, g, narm, pgl); } SEXP gl = PROTECT(allocVector(INTSXP, ng)); int *pg = INTEGER(g); pgl = INTEGER(gl); for(int i = ng; i--; ) pgl[i] = NA_INTEGER; --pgl; for(int i = length(g); i--; ) if(pgl[pg[i]] == NA_INTEGER) pgl[pg[i]] = i; SEXP res = flast_impl(x, ng, g, narm, ++pgl); UNPROTECT(1); return res; } SEXP flastlC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm) { int l = length(x), *pgl, ng = asInteger(Rng), narm = asLogical(Rnarm), nprotect = 1; if(ng > 0 && !narm) { SEXP gl = PROTECT(allocVector(INTSXP, ng)); ++nprotect; int *pg = INTEGER(g); pgl = INTEGER(gl); for(int i = ng; i--; ) pgl[i] = NA_INTEGER; --pgl; for(int i = length(g); i--; ) if(pgl[pg[i]] == NA_INTEGER) pgl[pg[i]] = i; ++pgl; } else pgl = &l; SEXP out = PROTECT(allocVector(VECSXP, l)); SEXP *px = SEXPPTR(x), *pout = SEXPPTR(out); for(int j = 0; j != l; ++j) pout[j] = flast_impl(px[j], ng, g, narm, pgl); DFcopyAttr(out, x, ng); UNPROTECT(nprotect); return out; } // For matrix writing a separate function to increase efficiency. SEXP flastmC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); int tx = TYPEOF(x), ng = asInteger(Rng), narm = asLogical(Rnarm), l = INTEGER(dim)[0], col = INTEGER(dim)[1]; if (l < 2) return x; if (ng == 0) { SEXP out = PROTECT(allocVector(tx, col)); if(narm) { switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int j = 0, i = l-1; j != col; ++j) { while(ISNAN(px[i]) && i != 0) --i; pout[j] = px[i]; px += l; i = l-1; } break; } case STRSXP: { SEXP *px = STRING_PTR(x), *pout = STRING_PTR(out); for(int j = 0, i = l-1; j != col; ++j) { while(px[i] == NA_STRING && i != 0) --i; pout[j] = px[i]; px += l; i = l-1; } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int j = 0, i = l-1; j != col; ++j) { while(px[i] == NA_INTEGER && i != 0) --i; pout[j] = px[i]; px += l; i = l-1; } break; } case VECSXP: { SEXP *px = STRING_PTR(x), *pout = STRING_PTR(out); for(int j = 0, i = l-1; j != col; ++j) { while(length(px[i]) == 0 && i != 0) --i; pout[j] = px[i]; px += l; i = l-1; } break; } default: error("Unsupported SEXP type!"); } } else { switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int j = 0; j != col; ++j) pout[j] = px[j * l + l-1]; break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int j = 0; j != col; ++j) pout[j] = px[j * l + l-1]; break; } case STRSXP: case VECSXP: { SEXP *px = SEXPPTR(x), *pout = SEXPPTR(out); for(int j = 0; j != col; ++j) pout[j] = px[j * l + l-1]; break; } default: error("Unsupported SEXP type!"); } } matCopyAttr(out, x, Rdrop, ng); UNPROTECT(1); return out; } else { // with groups if(length(g) != l) error("length(g) must match nrow(X)"); SEXP out = PROTECT(allocVector(tx, ng * col)); int *pg = INTEGER(g); if(narm) { switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int i = ng * col; i--; ) pout[i] = NA_REAL; --pout; for(int j = 0; j != col; ++j) { for(int i = l; i--; ) if(NISNAN(px[i]) && ISNAN(pout[pg[i]])) pout[pg[i]] = px[i]; px += l; pout += ng; } break; } case STRSXP: { SEXP *px = STRING_PTR(x), *pout = STRING_PTR(out); for(int i = ng * col; i--; ) pout[i] = NA_STRING; --pout; for(int j = 0; j != col; ++j) { for(int i = l; i--; ) if(px[i] != NA_STRING && pout[pg[i]] == NA_STRING) pout[pg[i]] = px[i]; px += l; pout += ng; } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int i = ng * col; i--; ) pout[i] = NA_INTEGER; --pout; for(int j = 0; j != col; ++j) { for(int i = l; i--; ) if(px[i] != NA_INTEGER && pout[pg[i]] == NA_INTEGER) pout[pg[i]] = px[i]; px += l; pout += ng; } break; } case VECSXP: { SEXP *px = SEXPPTR(x), *pout = SEXPPTR(out); for(int i = ng * col; i--; ) pout[i] = R_NilValue; --pout; for(int j = 0; j != col; ++j) { for(int i = l; i--; ) if(length(px[i]) && pout[pg[i]] != R_NilValue) pout[pg[i]] = px[i]; px += l; pout += ng; } break; } default: error("Unsupported SEXP type!"); } } else { SEXP gl = PROTECT(allocVector(INTSXP, ng)); int *pgl = INTEGER(gl); for(int i = ng; i--; ) pgl[i] = NA_INTEGER; --pgl; for(int i = l; i--; ) if(pgl[pg[i]] == NA_INTEGER) pgl[pg[i]] = i; ++pgl; switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int j = 0; j != col; ++j) { for(int i = ng; i--; ) pout[i] = px[pgl[i]]; px += l; pout += ng; } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int j = 0; j != col; ++j) { for(int i = ng; i--; ) pout[i] = px[pgl[i]]; px += l; pout += ng; } break; } case STRSXP: case VECSXP: { SEXP *px = SEXPPTR(x), *pout = SEXPPTR(out); for(int j = 0; j != col; ++j) { for(int i = ng; i--; ) pout[i] = px[pgl[i]]; px += l; pout += ng; } break; } default: error("Unsupported SEXP type!"); } UNPROTECT(1); } matCopyAttr(out, x, Rdrop, ng); UNPROTECT(1); return out; } } collapse/src/data.table_rbindlist.c0000644000176200001440000016102614107647322017074 0ustar liggesusers/* This code is adapted from the data.table package: http://r-datatable.com and licensed under a Mozilla Public License 2.0 (MPL-2.0) license. */ #include "data.table.h" #include #include // for isdigit #include // from assign.c. needed ? - yes for functions like R_FINITE // #include // Above code pasted from data.table_assign.c -> needed for rbindlist... static bool anyNamed(SEXP x) { if (MAYBE_REFERENCED(x)) return true; if (isNewList(x)) { // fixed gcc10 issue through better indentation for (int i=0; i1 && slen!=len && (!isNewList(target) || isNewList(source))) error("Internal error: recycle length error not caught earlier. slen=%d len=%d", slen, len); // # nocov // Internal error because the column has already been added to the DT, so length mismatch should have been caught before adding the column. // for 5647 this used to limit slen to len, but no longer if (colname==NULL) error("Internal error: memrecycle has received NULL colname"); // # nocov *memrecycle_message = '\0'; int protecti=0; if (isNewList(source)) { // A list() column; i.e. target is a column of pointers to SEXPs rather than the much more common case // where memrecycle copies the DATAPTR data to the atomic target from the atomic source. // If any item within the list is NAMED then take a fresh copy. So far this has occurred from dogroups.c when // j returns .BY or similar specials as-is within a list(). Those specials are static inside // dogroups so if we don't copy now the last value written to them by dogroups becomes repeated in the result; // i.e. the wrong result. // If source is itself recycled later (many list() column items pointing to the same object) we are ok with that // since we now have a fresh copy and := will not assign with a list() column's cell value; := only changes the // SEXP pointed to. // If source is already not named (because j already created a fresh unnamed vector within a list()) we don't want to // duplicate unnecessarily, hence checking for named rather than duplicating always. // See #481, #1270 and tests 1341.* fail without this copy. if (anyNamed(source)) { source = PROTECT(copyAsPlain(source)); protecti++; } } const bool sourceIsFactor=isFactor(source), targetIsFactor=isFactor(target); const bool sourceIsI64=isReal(source) && INHERITS(source, char_integer64); const bool targetIsI64=isReal(target) && INHERITS(target, char_integer64); if (sourceIsFactor || targetIsFactor) { if (!targetIsFactor) { if (!isString(target) && !isNewList(target)) error("Cannot assign 'factor' to '%s'. Factors can only be assigned to factor, character or list columns.", type2char(TYPEOF(target))); // else assigning factor to character is left to later below, avoiding wasteful asCharacterFactor } else if (!sourceIsFactor && !isString(source)) { // target is factor if (allNA(source, false)) { // return false for list and other types that allNA does not support source = ScalarLogical(NA_LOGICAL); // a global constant in R and won't allocate; fall through to regular zero-copy coerce } else if (isInteger(source) || isReal(source)) { // allow assigning level numbers to factor columns; test 425, 426, 429 and 1945 const int nlevel = length(getAttrib(target, R_LevelsSymbol)); if (isInteger(source)) { const int *sd = INTEGER(source); for (int i=0; inlevel) { error("Assigning factor numbers to column %d named '%s'. But %d is outside the level range [1,%d]", colnum, colname, val, nlevel); } } } else { const double *sd = REAL(source); for (int i=0; inlevel)) { error("Assigning factor numbers to column %d named '%s'. But %f is outside the level range [1,%d], or is not a whole number.", colnum, colname, val, nlevel); } } } // Now just let the valid level numbers fall through to regular assign by BODY below } else { error("Cannot assign '%s' to 'factor'. Factor columns can be assigned factor, character, NA in any type, or level numbers.", type2char(TYPEOF(source))); } } else { // either factor or character being assigned to factor column SEXP targetLevels = PROTECT(getAttrib(target, R_LevelsSymbol)); protecti++; SEXP sourceLevels = source; // character source if (sourceIsFactor) { sourceLevels=PROTECT(getAttrib(source, R_LevelsSymbol)); protecti++; } if (!sourceIsFactor || !R_compute_identical(sourceLevels, targetLevels, 0)) { // !sourceIsFactor for test 2115.6 const int nTargetLevels=length(targetLevels), nSourceLevels=length(sourceLevels); const SEXP *targetLevelsD=STRING_PTR(targetLevels), *sourceLevelsD=STRING_PTR(sourceLevels); SEXP newSource = PROTECT(allocVector(INTSXP, length(source))); protecti++; savetl_init(); for (int k=0; k0) { savetl(s); } else if (tl<0) { // # nocov start for (int j=0; j=0) { if (!sourceIsFactor && s==NA_STRING) continue; // don't create NA factor level when assigning character to factor; test 2117 if (tl>0) savetl(s); SET_TRUELENGTH(s, -nTargetLevels-(++nAdd)); } // else, when sourceIsString, it's normal for there to be duplicates here } const int nSource = length(source); int *newSourceD = INTEGER(newSource); if (sourceIsFactor) { const int *sourceD = INTEGER(source); for (int i=0; i0 && n>0 && n255, "d", "taken as 0") case REALSXP: if (sourceIsI64) CHECK_RANGE(int64_t, REAL, val<0 || val>255, PRId64, "taken as 0") else CHECK_RANGE(double, REAL, !R_FINITE(val) || val<0.0 || val>256.0 || (int)val!=val, "f", "either truncated (precision lost) or taken as 0") } break; case INTSXP: if (TYPEOF(source)==REALSXP) { if (sourceIsI64) CHECK_RANGE(int64_t, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX), PRId64, "out-of-range (NA)") else CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int)val!=val), "f", "truncated (precision lost)") } break; case REALSXP: if (targetIsI64 && isReal(source) && !sourceIsI64) { CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int)val!=val), "f", "truncated (precision lost)") } } } */ #undef BODY #define BODY(STYPE, RFUN, CTYPE, CAST, ASSIGN) {{ \ const STYPE *sd = (const STYPE *)RFUN(source); \ if (length(where)) { \ if (slen==1) { \ const STYPE val = sd[0]; \ const CTYPE cval = CAST; \ for (int wi=0; wi255 || val<0) ? 0 : val, td[i]=cval) case REALSXP: if (sourceIsI64) BODY(int64_t, REAL, Rbyte, (val>255 || val<0) ? 0 : val, td[i]=cval) else BODY(double, REAL, Rbyte, (ISNAN(val)||val>255||val<0) ? 0 : val, td[i]=cval) default: COERCE_ERROR("raw"); } } break; case LGLSXP: { int *td = LOGICAL(target) + off; switch (TYPEOF(source)) { case RAWSXP: BODY(Rbyte, RAW, int, val!=0, td[i]=cval) case LGLSXP: if (mc) { memcpy(td, LOGICAL(source), slen*sizeof(Rboolean)); break; } else BODY(int, LOGICAL, int, val, td[i]=cval) case INTSXP: BODY(int, INTEGER, int, val==NA_INTEGER ? NA_LOGICAL : val!=0, td[i]=cval) case REALSXP: if (sourceIsI64) BODY(int64_t, REAL, int, val==NA_INTEGER64 ? NA_LOGICAL : val!=0, td[i]=cval) else BODY(double, REAL, int, ISNAN(val) ? NA_LOGICAL : val!=0.0, td[i]=cval) default: COERCE_ERROR("logical"); } } break; case INTSXP : { int *td = INTEGER(target) + off; switch (TYPEOF(source)) { case RAWSXP: BODY(Rbyte, RAW, int, (int)val, td[i]=cval) case LGLSXP: // same as INTSXP ... case INTSXP: if (mc) { memcpy(td, INTEGER(source), slen*sizeof(int)); break; } else BODY(int, INTEGER, int, val, td[i]=cval) case REALSXP: if (sourceIsI64) BODY(int64_t, REAL, int, (val==NA_INTEGER64||val>INT_MAX||val<=NA_INTEGER) ? NA_INTEGER : (int)val, td[i]=cval) else BODY(double, REAL, int, ISNAN(val) ? NA_INTEGER : (int)val, td[i]=cval) default: COERCE_ERROR("integer"); // test 2005.4 } } break; case REALSXP : { if (targetIsI64) { int64_t *td = (int64_t *)REAL(target) + off; switch (TYPEOF(source)) { case RAWSXP: BODY(Rbyte, RAW, int64_t, (int64_t)val, td[i]=cval) case LGLSXP: // same as INTSXP case INTSXP: BODY(int, INTEGER, int64_t, val==NA_INTEGER ? NA_INTEGER64 : val, td[i]=cval) case REALSXP: if (sourceIsI64) { if (mc) { memcpy(td, (int64_t *)REAL(source), slen*sizeof(int64_t)); break; } else BODY(int64_t, REAL, int64_t, val, td[i]=cval) } else BODY(double, REAL, int64_t, R_FINITE(val) ? val : NA_INTEGER64, td[i]=cval) default: COERCE_ERROR("integer64"); } } else { double *td = REAL(target) + off; switch (TYPEOF(source)) { case RAWSXP: BODY(Rbyte, RAW, double, (double)val, td[i]=cval) case LGLSXP: // same as INTSXP case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER ? NA_REAL : val, td[i]=cval) case REALSXP: if (!sourceIsI64) { if (mc) { memcpy(td, (double *)REAL(source), slen*sizeof(double)); break; } else BODY(double, REAL, double, val, td[i]=cval) } else BODY(int64_t, REAL, double, val==NA_INTEGER64 ? NA_REAL : val, td[i]=cval) default: COERCE_ERROR("double"); } } } break; case CPLXSXP: { Rcomplex *td = COMPLEX(target) + off; double im = 0.0; switch (TYPEOF(source)) { case RAWSXP: BODY(Rbyte, RAW, double, (im=0.0,val), td[i].r=cval;td[i].i=im) case LGLSXP: // same as INTSXP case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im) case REALSXP: if (sourceIsI64) BODY(int64_t, REAL, double, val==NA_INTEGER64?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im) else BODY(double, REAL, double, ISNAN(val)?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im) case CPLXSXP: if (mc) { memcpy(td, COMPLEX(source), slen*sizeof(Rcomplex)); break; } else BODY(Rcomplex, COMPLEX, Rcomplex, val, td[i]=cval) default: COERCE_ERROR("complex"); } } break; case STRSXP : if (sourceIsFactor) { const SEXP *ld = STRING_PTR(PROTECT(getAttrib(source, R_LevelsSymbol))); protecti++; BODY(int, INTEGER, SEXP, val==NA_INTEGER ? NA_STRING : ld[val-1], SET_STRING_ELT(target, off+i, cval)) } else { if (!isString(source)) { if (allNA(source, true)) { // saves common coercion of NA (logical) to NA_character_ // ^^ =errorForBadType; if type list, that was already an error earlier so we // want to be strict now otherwise list would get to coerceVector below if (length(where)) { for (int i=0; i0) SET_STRING_ELT(target, wd[i]-1, NA_STRING); } else { for (int i=0; i(INT_MAX/2) ? INT_MAX : nalloc*2; char *tmp = (char *)realloc(saveds, nalloc*sizeof(SEXP)); if (tmp==NULL) { // C spec states that if realloc() fails the original block is left untouched; it is not freed or moved. We rely on that here. savetl_end(); // # nocov free(saveds) happens inside savetl_end error("Failed to realloc saveds to %d items in savetl", nalloc); // # nocov } saveds = (SEXP *)tmp; tmp = (char *)realloc(savedtl, nalloc*sizeof(R_len_t)); if (tmp==NULL) { savetl_end(); // # nocov error("Failed to realloc savedtl to %d items in savetl", nalloc); // # nocov } savedtl = (R_len_t *)tmp; } saveds[nsaved] = s; savedtl[nsaved] = TRUELENGTH(s); nsaved++; } void savetl_end() { // Can get called if nothing has been saved yet (nsaved==0), or even if _init() hasn't been called yet (pointers NULL). Such // as to clear up before error. Also, it might be that nothing needed to be saved anyway. for (int i=0; i0 checked above eachMax[i] = 0; SEXP li = VECTOR_ELT(l, i); if (isNull(li)) continue; if (TYPEOF(li) != VECSXP) error("Item %d of input is not a data.frame, data.table or list", i+1); const int thisncol = length(li); if (!thisncol) continue; // delete as now more flexible ... if (fill && isNull(getAttrib(li, R_NamesSymbol))) error("When fill=TRUE every item of the input must have column names. Item %d does not.", i+1); if (fill) { if (thisncol>ncol) ncol=thisncol; // this section initializes ncol with max ncol. ncol may be increased when usenames is accounted for further down } else { if (ncol==0) { ncol=thisncol; first=i; } else if (thisncol!=ncol) error("Item %d has %d columns, inconsistent with item %d which has %d columns. To fill missing columns use fill=TRUE.", i+1, thisncol, first+1, ncol); } int nNames = length(getAttrib(li, R_NamesSymbol)); if (nNames>0 && nNames!=thisncol) error("Item %d has %d columns but %d column names. Invalid object.", i+1, thisncol, nNames); if (nNames>0) anyNames=true; upperBoundUniqueNames += nNames; int maxLen=0, whichMax=0; for (int j=0; jmaxLen) { maxLen=tt; whichMax=j; } } for (int j=0; j1 && tt!=maxLen) error("Column %d of item %d is length %d inconsistent with column %d which is length %d. Only length-1 columns are recycled.", j+1, i+1, tt, whichMax+1, maxLen); if (tt==0 && maxLen>0 && numZero++==0) { firstZeroCol = j; firstZeroItem=i; } } eachMax[i] = maxLen; nrow += maxLen; } if (numZero) { // #1871 SEXP names = getAttrib(VECTOR_ELT(l, firstZeroItem), R_NamesSymbol); const char *ch = names==R_NilValue ? "" : CHAR(STRING_ELT(names, firstZeroCol)); warning("Column %d ['%s'] of item %d is length 0. This (and %d other%s like it) has been filled with NA (NULL for list columns) to make each item uniform.", firstZeroCol+1, ch, firstZeroItem+1, numZero-1, numZero==2?"":"s"); } if (nrow==0 && ncol==0) return(R_NilValue); if (nrow>INT32_MAX) error("Total rows in the list is %lld which is larger than the maximum number of rows, currently %d", nrow, INT32_MAX); if (usenames==TRUE && !anyNames) error("use.names=TRUE but no item of input list has any names"); int *colMap=NULL; // maps each column in final result to the column of each list item if (usenames==TRUE || usenames==NA_LOGICAL) { // here we proceed as if fill=true for brevity (accounting for dups is tricky) and then catch any missings after this branch // when use.names==NA we also proceed here as if use.names was TRUE to save new code and then check afterwards the map is 1:ncol for every item // first find number of unique column names present; i.e. length(unique(unlist(lapply(l,names)))) SEXP *uniq = (SEXP *)malloc(upperBoundUniqueNames * sizeof(SEXP)); // upperBoundUniqueNames was initialized with 1 to ensure this is defined (otherwise 0 when no item has names) if (!uniq) error("Failed to allocate upper bound of %lld unique column names [sum(lapply(l,ncol))]", upperBoundUniqueNames); savetl_init(); int nuniq=0; for (int i=0; i0) savetl(s); uniq[nuniq++] = s; SET_TRUELENGTH(s,-nuniq); } } if (nuniq>0) { SEXP *tt = realloc(uniq, nuniq*sizeof(SEXP)); // shrink to only what we need to release the spare if (!tt) free(uniq); // shrink never fails; just keep codacy happy uniq = tt; } // now count the dups (if any) and how they're distributed across the items int *counts = (int *)calloc(nuniq, sizeof(int)); // counts of names for each colnames int *maxdup = (int *)calloc(nuniq, sizeof(int)); // the most number of dups for any name within one colname vector if (!counts || !maxdup) { // # nocov start for (int i=0; i maxdup[u]) maxdup[u] = counts[u]; } } int ttncol = 0; for (int u=0; uncol) ncol=ttncol; free(maxdup); maxdup=NULL; // not needed again // ncol is now the final number of columns accounting for unique and dups across all colnames // allocate a matrix: nrows==length(list) each entry contains which column to fetch for that final column int *colMapRaw = (int *)malloc(LENGTH(l)*ncol * sizeof(int)); // the result of this scope used later int *uniqMap = (int *)malloc(ncol * sizeof(int)); // maps the ith unique string to the first time it occurs in the final result int *dupLink = (int *)malloc(ncol * sizeof(int)); // if a colname has occurred before (a dup) links from the 1st to the 2nd time in the final result, 2nd to 3rd, etc if (!colMapRaw || !uniqMap || !dupLink) { // # nocov start for (int i=0; i0) { w=dupLink[w]; --wi; } // hop through the dups if (wi && dupLink[w]==-1) { // first time we've seen this number of dups of this name w = dupLink[w] = lastDup--; uniqMap[w] = nextCol++; } } colMapRaw[i*ncol + uniqMap[w]] = j; } } } for (int i=0; iTYPEORDER(maxType)) maxType=thisType; // return ScalarInteger(maxType); if (isFactor(thisCol)) { if (isNull(getAttrib(thisCol,R_LevelsSymbol))) error("Column %d of item %d has type 'factor' but has no levels; i.e. malformed.", w+1, i+1); factor = true; if (isOrdered(thisCol)) { orderedFactor = true; int thisLen = length(getAttrib(thisCol, R_LevelsSymbol)); if (thisLen>longestLen) { longestLen=thisLen; longestLevels=getAttrib(thisCol, R_LevelsSymbol); /*for warnings later ...*/longestW=w; longestI=i; } } } else if (!isString(thisCol)) anyNotStringOrFactor=true; // even for length 0 columns for consistency; test 2113.3 if (INHERITS(thisCol, char_integer64)) { // PRINTNAME(install("integer64")) if (firsti>=0 && !length(getAttrib(firstCol, R_ClassSymbol))) { firsti=i; firstw=w; firstCol=thisCol; } // so the integer64 attribute gets copied to target below int64=true; } if (firsti==-1) { firsti=i; firstw=w; firstCol=thisCol; } else { if (!factor && !int64) { if (!R_compute_identical(PROTECT(getAttrib(thisCol, R_ClassSymbol)), PROTECT(getAttrib(firstCol, R_ClassSymbol)), 0)) { error("Class attribute on column %d of item %d does not match with column %d of item %d.", w+1, i+1, firstw+1, firsti+1); } UNPROTECT(2); } } } if (!foundName) { static char buff[12]; sprintf(buff,"V%d",j+1), SET_STRING_ELT(ansNames, idcol+j, mkChar(buff)); foundName=buff; } if (factor) maxType=INTSXP; // if any items are factors then a factor is created (could be an option) if (int64 && maxType!=REALSXP) error("Internal error: column %d of result is determined to be integer64 but maxType=='%s' != REALSXP", j+1, type2char(maxType)); // # nocov SEXP target; SET_VECTOR_ELT(ans, idcol+j, target=allocVector(maxType, nrow)); // does not initialize logical & numerics, but does initialize character and list if (!factor) copyMostAttrib(firstCol, target); // all but names,dim and dimnames; mainly for class. And if so, we want a copy here, not keepattr's SET_ATTRIB. if (factor && anyNotStringOrFactor) { // in future warn, or use list column instead ... warning("Column %d contains a factor but not all items for the column are character or factor", idcol+j+1); // some coercing from (likely) integer/numeric to character will be needed. But this coerce can feasibly fail with out-of-memory, so we have to do it up-front // before the savetl_init() because we have no hook to clean up tl if coerceVector fails. if (coercedForFactor==NULL) { coercedForFactor=PROTECT(allocVector(VECSXP, LENGTH(l))); nprotect++; } for (int i=0; i z regular factor because it contains an ambiguity: is a a a regular factor because this case isn't yet implemented. a0) savetl(s); levelsRaw[k] = s; SET_TRUELENGTH(s,-k-1); } for (int i=0; i=last) { // if tl>=0 then also tl>=last because last<=0 if (tl>=0) { sprintf(warnStr, // not direct warning as we're inside tl region "Column %d of item %d is an ordered factor but level %d ['%s'] is missing from the ordered levels from column %d of item %d. " \ "Each set of ordered factor levels should be an ordered subset of the first longest. A regular factor will be created for this column.", w+1, i+1, k+1, CHAR(s), longestW+1, longestI+1); } else { sprintf(warnStr, "Column %d of item %d is an ordered factor with '%s'<'%s' in its levels. But '%s'<'%s' in the ordered levels from column %d of item %d. " \ "A regular factor will be created for this column due to this ambiguity.", w+1, i+1, CHAR(levelsD[k-1]), CHAR(s), CHAR(s), CHAR(levelsD[k-1]), longestW+1, longestI+1); // k>=1 (so k-1 is ok) because when k==0 last==0 and this branch wouldn't happen } orderedFactor=false; i=LENGTH(l); // break outer i loop break; // break inner k loop // we leave the tl set for the longest levels; the regular factor will be created with the longest ordered levels first in case that useful for user } last = tl; // negative ordinal; last should monotonically grow more negative if the levels are an ordered subset of the longest } } } } for (int i=0; i0) savetl(s); if (allocLevel==nLevel) { // including initial time when allocLevel==nLevel==0 SEXP *tt = NULL; if (allocLevel(int64_t)INT_MAX) ? INT_MAX : (int)new; tt = (SEXP *)realloc(levelsRaw, allocLevel*sizeof(SEXP)); // first time levelsRaw==NULL and realloc==malloc in that case } if (tt==NULL) { // # nocov start // C spec states that if realloc() fails (above) the original block (levelsRaw) is left untouched: it is not freed or moved. We ... for (int k=0; k #include using namespace Rcpp ; // General to do: Check if you can do it without unsigned int and n[l+1] but just with int and n[l] // also:: perhaps redo everything with data pointers and 2d group indices (instead of filling the 2d structure every time): http://www.cplusplus.com/reference/vector/vector/data/ // https://stackoverflow.com/questions/1733143/converting-between-c-stdvector-and-c-array-without-copying?rq=1 // For named vectors, could add right name! template Vector fmodeImpl(const Vector& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, int ret) { int l = x.size(); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 bool minm = ret == 1, nfirstm = ret > 0, lastm = ret == 3; typedef typename Rcpp::traits::storage_type::type storage_t; auto isnanT = (RTYPE == REALSXP) ? [](storage_t x) { return x != x; } : [](storage_t x) { return x == Vector::get_na(); }; unsigned int addr; if(Rf_isNull(w)) { // No Weights if(ng == 0) { sugar::IndexHash hash(x); int max = 1, index = 0; // n[l+1] is unstable std::vector n(l+1); // = no_init_vector // better for valgrind storage_t mode = hash.src[0]; if(narm) { int i = 0, end = l-1; while(isnanT(mode) && i!=end) mode = hash.src[++i]; if(i!=end) { for( ; i != l; ++i) { storage_t val = hash.src[i]; if(isnanT(val)) continue; addr = hash.get_addr(val); index = hash.data[addr]; while(index && hash.not_equal(hash.src[index - 1], val)) { ++addr; if(addr == static_cast(hash.m)) addr = 0; index = hash.data[addr]; } if(!index) { hash.data[addr] = i+1; ++hash.size_; n[i+1] = 1; if(nfirstm && max == 1) { // Could also do this at the end in a separate loop. What is faster ? -> This seems better ! if(lastm) mode = val; else if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } else { // if(++n[index] > max) { // good, or create int index // max = n[index]; // mode = val; // } if(++n[index] >= max) { if(lastm || n[index] > max) { max = n[index]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } // if(nfirstm && max == 1) { // Above seems better ! // if(minm) { // for(int i = 1; i != l; ++i) if(mode > x[i]) mode = x[i]; // } else { // for(int i = 1; i != l; ++i) if(mode < x[i]) mode = x[i]; // } // } } } else { for(int i = 0; i != l; ++i) { storage_t val = hash.src[i]; addr = hash.get_addr(val); index = hash.data[addr]; while(index && hash.not_equal(hash.src[index - 1], val)) { ++addr; if(addr == static_cast(hash.m)) addr = 0; index = hash.data[addr]; } if(!index) { hash.data[addr] = i+1; ++hash.size_; n[i+1] = 1; if(nfirstm && max == 1) { // Could also do this at the end in a separate loop. What is faster ? -> This seems better ! if(lastm) mode = val; else if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } else { // if(++n[index] > max) { // good, or create int index // max = n[index]; // mode = val; // } if(++n[index] >= max) { // good, or create int index if(lastm || n[index] > max) { max = n[index]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } // if(nfirstm && max == 1) { // above seems better // if(minm) { // for(int i = 1; i != l; ++i) if(mode > x[i]) mode = x[i]; // } else { // for(int i = 1; i != l; ++i) if(mode < x[i]) mode = x[i]; // } // } } Vector out(1, mode); SHALLOW_DUPLICATE_ATTRIB(out, x); // could add right name for named vectors if(Rf_getAttrib(x, R_NamesSymbol) != R_NilValue) Rf_setAttrib(out, R_NamesSymbol, R_NilValue); return out; } else { if(l != g.size()) stop("length(g) must match length(x)"); const int *pg = g.begin(); int ngp = ng+1; std::vector > gmap(ngp); Vector out = no_init_vector(ng); std::vector n(ngp); // memset(n, 0, sizeof(int)*ngp); if(Rf_isNull(gs)) { for(int i = 0; i != l; ++i) ++n[pg[i]]; for(int i = 1; i != ngp; ++i) { if(n[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); gmap[i] = std::vector (n[i]); // Vector n[i] = 0; } } else { IntegerVector gsv = gs; if(ng != gsv.size()) stop("ng must match length(gs)"); for(int i = 0; i != ng; ++i) { if(gsv[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); gmap[i+1] = std::vector (gsv[i]); // Vector } } for(int i = 0; i != l; ++i) gmap[pg[i]][n[pg[i]]++] = x[i]; if(narm) { for(int gr = 0; gr != ng; ++gr) { // const std::vector& temp = gmap[gr]; // wrap() // good ? // const Vector& // better for character strings sugar::IndexHash hash(wrap(gmap[gr+1])); // wrap(temp) int i = 0, s = hash.n, end = s-1, max = 1, index; // n[s+1] // fastest ? use n ? while(isnanT(hash.src[i]) && i!=end) ++i; out[gr] = hash.src[i]; // good if(i!=end) { std::vector n(s+1); // = no_init_vector // better for valgrind for( ; i != s; ++i) { storage_t val = hash.src[i]; if(isnanT(val)) continue; addr = hash.get_addr(val); index = hash.data[addr]; while(index && hash.not_equal(hash.src[index - 1], val)) { ++addr; if(addr == static_cast(hash.m)) addr = 0; index = hash.data[addr]; } if(!index) { hash.data[addr] = i+1; ++hash.size_; n[i+1] = 1; if(nfirstm && max == 1) { // Could also do this at the end in a separate loop. What is faster ? -> This seems better ! if(lastm) out[gr] = val; else if(minm) { if(out[gr] > val) out[gr] = val; } else { if(out[gr] < val) out[gr] = val; } } } else { // if(++n[hash.data[addr]] > max) { // good, or create int index // max = n[hash.data[addr]]; // out[gr] = val; // } // index = hash.data[addr]; if(++n[index] >= max) { if(lastm || n[index] > max) { max = n[index]; out[gr] = val; } else if(nfirstm) { if(minm) { if(out[gr] > val) out[gr] = val; } else { if(out[gr] < val) out[gr] = val; } } } } } // if(nfirstm && max == 1) { // Above seems better ! // if(minm) { // for(int i = 1; i != s; ++i) if(out[gr] > hash.src[i]) out[gr] = hash.src[i]; // } else { // for(int i = 1; i != s; ++i) if(out[gr] < hash.src[i]) out[gr] = hash.src[i]; // } // } } } } else { for(int gr = 0; gr != ng; ++gr) { // const std::vector& temp = gmap[gr]; // good ? // const Vector& // wrap() sugar::IndexHash hash(wrap(gmap[gr+1])); // wrap(temp) out[gr] = hash.src[0]; int s = hash.n, max = 1, index; // n[s+1] // fastest ? use n ? and reset partially ? std::vector n(s+1); // = no_init_vector // better for valgrind for(int i = 0; i != s; ++i) { storage_t val = hash.src[i]; addr = hash.get_addr(val); index = hash.data[addr]; while(index && hash.not_equal(hash.src[index - 1], val)) { ++addr; if(addr == static_cast(hash.m)) addr = 0; index = hash.data[addr]; } if(!index) { hash.data[addr] = i+1; ++hash.size_; n[i+1] = 1; if(nfirstm && max == 1) { // Could also do this at the end in a separate loop. What is faster ? -> This seems better ! if(lastm) out[gr] = val; else if(minm) { if(out[gr] > val) out[gr] = val; } else { if(out[gr] < val) out[gr] = val; } } } else { // if(++n[hash.data[addr]] > max) { // good, or create int index // max = n[hash.data[addr]]; // out[gr] = val; // } if(++n[index] >= max) { if(lastm || n[index] > max) { max = n[index]; out[gr] = val; } else if(nfirstm) { if(minm) { if(out[gr] > val) out[gr] = val; } else { if(out[gr] < val) out[gr] = val; } } } } } // if(nfirstm && max == 1) { // Above seems better ! // if(minm) { // for(int i = 1; i != s; ++i) if(out[gr] > hash.src[i]) out[gr] = hash.src[i]; // } else { // for(int i = 1; i != s; ++i) if(out[gr] < hash.src[i]) out[gr] = hash.src[i]; // } // } } } SHALLOW_DUPLICATE_ATTRIB(out, x); if(Rf_getAttrib(x, R_NamesSymbol) != R_NilValue) Rf_setAttrib(out, R_NamesSymbol, R_NilValue); return out; } } else { // With Weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); double *pwg = wg.begin(); if(ng == 0) { sugar::IndexHash hash(x); double max = DBL_MIN; int index = 0; std::vector n(l+1); // = no_init_vector // better for valgrind storage_t mode = hash.src[0]; if(narm) { int i = 0, end = l-1; while((isnanT(mode) || std::isnan(pwg[i])) && i!=end) mode = hash.src[++i]; if(i!=end) for( ; i != l; ++i) { storage_t val = hash.src[i]; if(isnanT(val) || std::isnan(pwg[i])) continue; addr = hash.get_addr(val); index = hash.data[addr]; while(index && hash.not_equal(hash.src[index - 1], val)) { ++addr; if(addr == static_cast(hash.m)) addr = 0; index = hash.data[addr]; } if(!index) { hash.data[addr] = i+1; ++hash.size_; n[i+1] = pwg[i]; if(pwg[i] >= max) { // necessary, because second loop only entered for more than one occurrence of the same value if(lastm || pwg[i] > max) { max = pwg[i]; mode = val; } else if(nfirstm) { // Could also do this at the end in a separate loop. What is faster ?? if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } else { n[index] += pwg[i]; // if(n[index] > max) { // good, or create int index // max = n[index]; // mode = val; // } if(n[index] >= max) { if(lastm || n[index] > max) { max = n[index]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } } else { for(int i = 0; i != l; ++i) { if(std::isnan(pwg[i])) continue; storage_t val = hash.src[i]; addr = hash.get_addr(val); index = hash.data[addr]; while(index && hash.not_equal(hash.src[index - 1], val)) { ++addr; if(addr == static_cast(hash.m)) addr = 0; index = hash.data[addr]; } if(!index) { hash.data[addr] = i+1; ++hash.size_; n[i+1] = pwg[i]; if(pwg[i] >= max) { // necessary, because second loop only entered for more than one occurrence of the same value if(lastm || pwg[i] > max) { max = pwg[i]; mode = val; } else if(nfirstm) { // Could also do this at the end in a separate loop. What is faster ?? if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } else { n[index] += pwg[i]; // if(n[index] > max) { // good, or create int index // max = n[index]; // mode = val; // } if(n[index] >= max) { // good, or create int index if(lastm || n[index] > max) { max = n[index]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } } Vector out(1, mode); SHALLOW_DUPLICATE_ATTRIB(out, x); if(Rf_getAttrib(x, R_NamesSymbol) != R_NilValue) Rf_setAttrib(out, R_NamesSymbol, R_NilValue); return out; } else { if(l != g.size()) stop("length(g) must match length(x)"); const int *pg = g.begin(); int ngp = ng+1; std::vector > gmap(ngp); std::vector > wmap(ngp); Vector out = no_init_vector(ng); std::vector n(ngp); if(Rf_isNull(gs)) { for(int i = 0; i != l; ++i) ++n[pg[i]]; for(int i = 1; i != ngp; ++i) { if(n[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); gmap[i] = std::vector (n[i]); wmap[i] = std::vector (n[i]); n[i] = 0; } } else { IntegerVector gsv = gs; if(ng != gsv.size()) stop("ng must match length(gs)"); for(int i = 0; i != ng; ++i) { if(gsv[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); gmap[i+1] = std::vector (gsv[i]); wmap[i+1] = std::vector (gsv[i]); } } for(int i = 0; i != l; ++i) { int gi = pg[i]; gmap[gi][n[gi]] = x[i]; wmap[gi][n[gi]++] = pwg[i]; } if(narm) { for(int gr = 0; gr != ng; ++gr) { // const std::vector& temp = gmap[gr]; // good ? // const Vector& // wrap() const std::vector& wtemp = wmap[gr+1]; sugar::IndexHash hash(wrap(gmap[gr+1])); // wrap(temp) int i = 0, s = hash.n, end = s-1, index; double max = DBL_MIN; // n[s+1] while((isnanT(hash.src[i]) || std::isnan(wtemp[i])) && i!=end) ++i; out[gr] = hash.src[i]; // good ! if(i!=end) { std::vector n(s+1); // = no_init_vector // better for valgrind for( ; i != s; ++i) { storage_t val = hash.src[i]; if(isnanT(val) || std::isnan(wtemp[i])) continue; addr = hash.get_addr(val); index = hash.data[addr]; while(index && hash.not_equal(hash.src[index - 1], val)) { ++addr; if(addr == static_cast(hash.m)) addr = 0; index = hash.data[addr]; } if(!index) { hash.data[addr] = i+1; ++hash.size_; n[i+1] = wtemp[i]; if(wtemp[i] >= max) { // necessary, because second loop only entered for more than one occurrence of the same value if(lastm || wtemp[i] > max) { max = wtemp[i]; out[gr] = val; } else if(nfirstm) { // Could also do this at the end in a separate loop. What is faster ?? if(minm) { if(out[gr] > val) out[gr] = val; } else { if(out[gr] < val) out[gr] = val; } } } } else { n[index] += wtemp[i]; // if(n[index] > max) { // max = n[index]; // out[gr] = val; // } if(n[index] >= max) { if(lastm || n[index] > max) { max = n[index]; out[gr] = val; } else if(nfirstm) { if(minm) { if(out[gr] > val) out[gr] = val; } else { if(out[gr] < val) out[gr] = val; } } } } } } } } else { for(int gr = 0; gr != ng; ++gr) { // const std::vector& temp = gmap[gr]; // good ? // const Vector& // wrap() const std::vector& wtemp = wmap[gr+1]; sugar::IndexHash hash(wrap(gmap[gr+1])); // wrap(temp) out[gr] = hash.src[0]; int s = hash.n, index; // fastest ? use n ? and reset partially ? double max = DBL_MIN; // n[s+1]; std::vector n(s+1); // = no_init_vector // better for valgrind for(int i = 0; i != s; ++i) { if(std::isnan(wtemp[i])) continue; storage_t val = hash.src[i]; addr = hash.get_addr(val); index = hash.data[addr]; while(index && hash.not_equal(hash.src[index - 1], val)) { ++addr; if(addr == static_cast(hash.m)) addr = 0; index = hash.data[addr]; } if(!index) { hash.data[addr] = i+1; ++hash.size_; n[i+1] = wtemp[i]; if(wtemp[i] >= max) { // necessary, because second loop only entered for more than one occurrence of the same value if(lastm || wtemp[i] > max) { max = wtemp[i]; out[gr] = val; } else if(nfirstm) { // Could also do this at the end in a separate loop. What is faster ?? if(minm) { if(out[gr] > val) out[gr] = val; } else { if(out[gr] < val) out[gr] = val; } } } } else { n[index] += wtemp[i]; // if(n[index] > max) { // max = n[index]; // out[gr] = val; // } if(n[index] >= max) { if(lastm || n[index] > max) { max = n[index]; out[gr] = val; } else if(nfirstm) { if(minm) { if(out[gr] > val) out[gr] = val; } else { if(out[gr] < val) out[gr] = val; } } } } } } } SHALLOW_DUPLICATE_ATTRIB(out, x); if(Rf_getAttrib(x, R_NamesSymbol) != R_NilValue) Rf_setAttrib(out, R_NamesSymbol, R_NilValue); return out; } } } IntegerVector fmodeFACT(const IntegerVector& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, int ret) { int l = x.size(), nlevp = Rf_nlevels(x)+1, val = 0; if(l < 1) return x; // Prevents seqfault for numeric(0) #101 bool minm = ret == 1, nfirstm = ret > 0, lastm = ret == 3; if(Rf_isNull(w)) { // No Weights if(ng == 0) { // No Groups std::vector n(nlevp); int max = 1, mode = x[0]; if(narm) { int i = 0, end = l-1; while(mode == NA_INTEGER && i!=end) mode = x[++i]; if(i!=end) { for( ; i != l; ++i) { val = x[i]; if(val == NA_INTEGER) continue; if(++n[val] >= max) { if(lastm || n[val] > max) { max = n[val]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } } else { for(int i = 0; i != l; ++i) { val = x[i]; if(val == NA_INTEGER) val = 0; if(++n[val] >= max) { if(lastm || n[val] > max) { max = n[val]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } if(mode == 0) mode = NA_INTEGER; } IntegerVector out(1, mode); SHALLOW_DUPLICATE_ATTRIB(out, x); // could add right name for names vectors if(Rf_getAttrib(x, R_NamesSymbol) != R_NilValue) Rf_setAttrib(out, R_NamesSymbol, R_NilValue); return out; } else { if(l != g.size()) stop("length(g) must match length(x)"); const int *pg = g.begin(); int ngp = ng+1; std::vector > gmap(ngp); IntegerVector out = no_init_vector(ng); std::vector n(ngp); if(Rf_isNull(gs)) { for(int i = 0; i != l; ++i) ++n[pg[i]]; for(int i = 1; i != ngp; ++i) { if(n[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); gmap[i] = std::vector (n[i]); n[i] = 0; } } else { IntegerVector gsv = gs; if(ng != gsv.size()) stop("ng must match length(gs)"); for(int i = 0; i != ng; ++i) { if(gsv[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); gmap[i+1] = std::vector (gsv[i]); } } for(int i = 0; i != l; ++i) gmap[pg[i]][n[pg[i]]++] = x[i]; if(narm) { for(int gr = 0; gr != ng; ++gr) { const std::vector& temp = gmap[gr+1]; int i = 0, s = temp.size(), end = s-1, max = 1; while(temp[i] == NA_INTEGER && i!=end) ++i; out[gr] = temp[i]; if(i!=end) { std::vector n(nlevp); for( ; i != s; ++i) { val = temp[i]; if(val == NA_INTEGER) continue; if(++n[val] >= max) { if(lastm || n[val] > max) { max = n[val]; out[gr] = val; } else if(nfirstm) { if(minm) { if(out[gr] > val) out[gr] = val; } else { if(out[gr] < val) out[gr] = val; } } } } } } } else { for(int gr = 0; gr != ng; ++gr) { const std::vector& temp = gmap[gr+1]; int tl = temp.size(), max = 1; std::vector n(nlevp); out[gr] = temp[0]; for(int i = 0; i != tl; ++i) { val = temp[i]; if(val == NA_INTEGER) val = 0; if(++n[val] >= max) { if(lastm || n[val] > max) { max = n[val]; out[gr] = val; } else if(nfirstm) { if(minm) { if(out[gr] > val) out[gr] = val; } else { if(out[gr] < val) out[gr] = val; } } } } if(out[gr] == 0) out[gr] = NA_INTEGER; } } SHALLOW_DUPLICATE_ATTRIB(out, x); if(Rf_getAttrib(x, R_NamesSymbol) != R_NilValue) Rf_setAttrib(out, R_NamesSymbol, R_NilValue); return out; } } else { // With Weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); double *pwg = wg.begin(); if(ng == 0) { double max = DBL_MIN; int mode = x[0]; std::vector n(nlevp); if(narm) { int i = 0, end = l-1; while((mode == NA_INTEGER || std::isnan(pwg[i])) && i!=end) mode = x[++i]; if(i!=end) for( ; i != l; ++i) { val = x[i]; if(val == NA_INTEGER || std::isnan(pwg[i])) continue; n[val] += pwg[i]; if(n[val] >= max) { if(lastm || n[val] > max) { max = n[val]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } else { for(int i = 0; i != l; ++i) { if(std::isnan(pwg[i])) continue; val = x[i]; if(val == NA_INTEGER) val = 0; n[val] += pwg[i]; if(n[val] >= max) { if(lastm || n[val] > max) { max = n[val]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } if(mode == 0) mode = NA_INTEGER; } IntegerVector out(1, mode); SHALLOW_DUPLICATE_ATTRIB(out, x); if(Rf_getAttrib(x, R_NamesSymbol) != R_NilValue) Rf_setAttrib(out, R_NamesSymbol, R_NilValue); return out; } else { if(l != g.size()) stop("length(g) must match length(x)"); const int *pg = g.begin(); int ngp = ng+1; std::vector > gmap(ngp); std::vector > wmap(ngp); IntegerVector out = no_init_vector(ng); std::vector n(ngp); if(Rf_isNull(gs)) { for(int i = 0; i != l; ++i) ++n[pg[i]]; for(int i = 1; i != ngp; ++i) { if(n[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); gmap[i] = std::vector (n[i]); wmap[i] = std::vector (n[i]); n[i] = 0; } } else { IntegerVector gsv = gs; if(ng != gsv.size()) stop("ng must match length(gs)"); for(int i = 0; i != ng; ++i) { if(gsv[i] == 0) stop("Group size of 0 encountered. This is probably due to unused factor levels. Use fdroplevels(f) to drop them."); gmap[i+1] = std::vector (gsv[i]); wmap[i+1] = std::vector (gsv[i]); } } for(int i = 0; i != l; ++i) { int gi = pg[i]; gmap[gi][n[gi]] = x[i]; wmap[gi][n[gi]++] = pwg[i]; } if(narm) { for(int gr = 0; gr != ng; ++gr) { const std::vector& temp = gmap[gr+1]; const std::vector& wtemp = wmap[gr+1]; int i = 0, s = temp.size(), end = s-1; double max = DBL_MIN; while((temp[i] == NA_INTEGER || std::isnan(wtemp[i])) && i!=end) ++i; out[gr] = temp[i]; if(i!=end) { std::vector n(nlevp); for( ; i != s; ++i) { val = temp[i]; if(val == NA_INTEGER || std::isnan(wtemp[i])) continue; n[val] += wtemp[i]; if(n[val] >= max) { if(lastm || n[val] > max) { max = n[val]; out[gr] = val; } else if(nfirstm) { if(minm) { if(out[gr] > val) out[gr] = val; } else { if(out[gr] < val) out[gr] = val; } } } } } } } else { for(int gr = 0; gr != ng; ++gr) { const std::vector& temp = gmap[gr+1]; const std::vector& wtemp = wmap[gr+1]; int tl = temp.size(); double max = DBL_MIN; std::vector n(nlevp); out[gr] = temp[0]; for(int i = 0; i != tl; ++i) { if(std::isnan(wtemp[i])) continue; val = temp[i]; if(val == NA_INTEGER) val = 0; n[val] += wtemp[i]; if(n[val] >= max) { if(lastm || n[val] > max) { max = n[val]; out[gr] = val; } else if(nfirstm) { if(minm) { if(out[gr] > val) out[gr] = val; } else { if(out[gr] < val) out[gr] = val; } } } } if(out[gr] == 0) out[gr] = NA_INTEGER; } } SHALLOW_DUPLICATE_ATTRIB(out, x); if(Rf_getAttrib(x, R_NamesSymbol) != R_NilValue) Rf_setAttrib(out, R_NamesSymbol, R_NilValue); return out; } } } template <> // No logical vector with sugar::IndexHash ! Vector fmodeImpl(const Vector& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, int ret) { int l = x.size(); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 bool maxm = ret != 1; if(Rf_isNull(w)) { if(ng == 0) { int Ntrue = 0, Nfalse = 0; LogicalVector out(1); if(narm) { for(int i = 0; i != l; ++i) { if(x[i] == NA_LOGICAL) continue; if(x[i]) ++Ntrue; else ++Nfalse; } out[0] = (Ntrue == 0 && Nfalse == 0) ? NA_LOGICAL : (maxm || Ntrue != Nfalse) ? Ntrue >= Nfalse : false; } else { int NNA = 0; for(int i = 0; i != l; ++i) { if(x[i] == NA_LOGICAL) ++NNA; else if(x[i]) ++Ntrue; // else if is crucial here ! else ++Nfalse; } out[0] = (NNA > Ntrue && NNA > Nfalse) ? NA_LOGICAL : (maxm || Ntrue != Nfalse) ? Ntrue >= Nfalse : false; } SHALLOW_DUPLICATE_ATTRIB(out, x); if(Rf_getAttrib(x, R_NamesSymbol) != R_NilValue) Rf_setAttrib(out, R_NamesSymbol, R_NilValue); return out; } else { if(l != g.size()) stop("length(g) must match length(x)"); if(narm) { IntegerVector truefalse(ng); LogicalVector out(ng, NA_LOGICAL); for(int i = 0; i != l; ++i) { if(x[i] == NA_LOGICAL) continue; if(x[i]) { if(++truefalse[g[i]-1] >= 0) out[g[i]-1] = true; } else { if(--truefalse[g[i]-1] < 0) out[g[i]-1] = false; } } if(!maxm) for(int i = ng; i--; ) if(truefalse[i] == 0 && out[i] != NA_LOGICAL) out[i] = false; SHALLOW_DUPLICATE_ATTRIB(out, x); if(Rf_getAttrib(x, R_NamesSymbol) != R_NilValue) Rf_setAttrib(out, R_NamesSymbol, R_NilValue); return out; } else { IntegerVector Ntrue(ng), Nfalse(ng), NNA(ng); // better way ? LogicalVector out = no_init_vector(ng); for(int i = 0; i != l; ++i) { if(x[i] == NA_LOGICAL) ++NNA[g[i]-1]; else if(x[i]) ++Ntrue[g[i]-1]; else ++Nfalse[g[i]-1]; } if(maxm) { for(int i = ng; i--; ) { if(NNA[i] > Ntrue[i] && NNA[i] > Nfalse[i]) out[i] = NA_LOGICAL; else out[i] = Ntrue[i] >= Nfalse[i]; } } else { for(int i = ng; i--; ) { if(NNA[i] > Ntrue[i] && NNA[i] > Nfalse[i]) out[i] = NA_LOGICAL; else out[i] = Ntrue[i] > Nfalse[i]; } } SHALLOW_DUPLICATE_ATTRIB(out, x); if(Rf_getAttrib(x, R_NamesSymbol) != R_NilValue) Rf_setAttrib(out, R_NamesSymbol, R_NilValue); return out; } } } else { NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); if(ng == 0) { LogicalVector out(1); double sumwtrue = 0, sumwfalse = 0; if(narm) { for(int i = 0; i != l; ++i) { if(x[i] == NA_LOGICAL || std::isnan(wg[i])) continue; if(x[i]) sumwtrue += wg[i]; else sumwfalse += wg[i]; } out[0] = (sumwtrue == 0 && sumwfalse == 0) ? NA_LOGICAL : (maxm || sumwtrue != sumwfalse) ? sumwtrue >= sumwfalse : false; } else { double sumwNA = 0; for(int i = 0; i != l; ++i) { if(std::isnan(wg[i])) continue; if(x[i] == NA_LOGICAL) sumwNA += wg[i]; else if(x[i]) sumwtrue += wg[i]; else sumwfalse += wg[i]; } // important as w could be NA as well.. out[0] = ((sumwNA > sumwtrue && sumwNA > sumwfalse) || (sumwtrue == 0 && sumwfalse == 0)) ? NA_LOGICAL : (maxm || sumwtrue != sumwfalse) ? sumwtrue >= sumwfalse : false; } SHALLOW_DUPLICATE_ATTRIB(out, x); if(Rf_getAttrib(x, R_NamesSymbol) != R_NilValue) Rf_setAttrib(out, R_NamesSymbol, R_NilValue); return out; } else { if(l != g.size()) stop("length(g) must match length(x)"); if(narm) { NumericVector sumwtruefalse(ng); LogicalVector out(ng, NA_LOGICAL); for(int i = 0; i != l; ++i) { if(x[i] == NA_LOGICAL || std::isnan(wg[i])) continue; if(x[i]) { sumwtruefalse[g[i]-1] += wg[i]; if(sumwtruefalse[g[i]-1] >= 0) out[g[i]-1] = true; } else { sumwtruefalse[g[i]-1] -= wg[i]; if(sumwtruefalse[g[i]-1] < 0) out[g[i]-1] = false; } } if(!maxm) for(int i = ng; i--; ) if(sumwtruefalse[i] == 0 && out[i] != NA_LOGICAL) out[i] = false; SHALLOW_DUPLICATE_ATTRIB(out, x); if(Rf_getAttrib(x, R_NamesSymbol) != R_NilValue) Rf_setAttrib(out, R_NamesSymbol, R_NilValue); return out; } else { NumericVector sumwtrue(ng), sumwfalse(ng), sumwNA(ng); // better way ? LogicalVector out = no_init_vector(ng); for(int i = 0; i != l; ++i) { if(std::isnan(wg[i])) continue; if(x[i] == NA_LOGICAL) sumwNA[g[i]-1] += wg[i]; else if(x[i]) sumwtrue[g[i]-1] += wg[i]; else sumwfalse[g[i]-1] += wg[i]; } if(maxm) { for(int i = ng; i--; ) { // important as w could be NA as well.. if((sumwNA[i] > sumwtrue[i] && sumwNA[i] > sumwfalse[i]) || sumwtrue[i] + sumwfalse[i] == 0) out[i] = NA_LOGICAL; else out[i] = sumwtrue[i] >= sumwfalse[i]; } } else { for(int i = ng; i--; ) { // important as w could be NA as well.. if((sumwNA[i] > sumwtrue[i] && sumwNA[i] > sumwfalse[i]) || sumwtrue[i] + sumwfalse[i] == 0) out[i] = NA_LOGICAL; else out[i] = sumwtrue[i] > sumwfalse[i]; } } SHALLOW_DUPLICATE_ATTRIB(out, x); if(Rf_getAttrib(x, R_NamesSymbol) != R_NilValue) Rf_setAttrib(out, R_NamesSymbol, R_NilValue); return out; } } } } // [[Rcpp::export]] SEXP fmodeCpp(const SEXP& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, int ret = 0) { switch(TYPEOF(x)) { case REALSXP: return fmodeImpl(x, ng, g, gs, w, narm, ret); case INTSXP: if(Rf_isFactor(x) && (ng == 0 || Rf_nlevels(x) < Rf_length(x) / ng * 3)) return fmodeFACT(x, ng, g, gs, w, narm, ret); return fmodeImpl(x, ng, g, gs, w, narm, ret); case STRSXP: return fmodeImpl(x, ng, g, gs, w, narm, ret); case LGLSXP: return fmodeImpl(x, ng, g, gs, w, narm, ret); default: stop("Not supported SEXP type !"); } } // Replicating weight 2d array all the time is stupid // [[Rcpp::export]] // Better Solution ? // What about string ? -> do like matrix, but keep vector LGLSXP method SEXP fmodelCpp(const List& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, int ret = 0) { int l = x.size(); List out(l); for(int j = l; j--; ) { switch(TYPEOF(x[j])) { case REALSXP: out[j] = fmodeImpl(x[j], ng, g, gs, w, narm, ret); break; case INTSXP: if(Rf_isFactor(x[j]) && (ng == 0 || Rf_nlevels(x[j]) < Rf_length(x[j]) / ng * 3)) out[j] = fmodeFACT(x[j], ng, g, gs, w, narm, ret); else out[j] = fmodeImpl(x[j], ng, g, gs, w, narm, ret); break; case STRSXP: out[j] = fmodeImpl(x[j], ng, g, gs, w, narm, ret); break; case LGLSXP: out[j] = fmodeImpl(x[j], ng, g, gs, w, narm, ret); break; default: stop("Not supported SEXP type !"); } } SHALLOW_DUPLICATE_ATTRIB(out, x); if(ng == 0) Rf_setAttrib(out, R_RowNamesSymbol, Rf_ScalarInteger(1)); else Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return out; } template SEXP fmodemImpl(const Matrix& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, bool drop, int ret) { int col = x.ncol(); Matrix out = (ng == 0) ? no_init_matrix(1, col) : no_init_matrix(ng, col); for(int j = col; j--; ) out(_, j) = fmodeImpl(x(_, j), ng, g, gs, w, narm, ret); if(drop && ng == 0) { Rf_setAttrib(out, R_DimSymbol, R_NilValue); // Rf_dimgets(out, R_NilValue); -> Doesn't work ! Rf_setAttrib(out, R_NamesSymbol, colnames(x)); } else { colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); } return out; } template <> SEXP fmodemImpl(const Matrix& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, bool drop, int ret) { stop("Not supported SEXP type!"); } template <> SEXP fmodemImpl(const Matrix& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, bool drop, int ret) { stop("Not supported SEXP type!"); } template <> SEXP fmodemImpl(const Matrix& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, bool drop, int ret) { stop("Not supported SEXP type!"); } template <> SEXP fmodemImpl(const Matrix& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, bool drop, int ret) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] SEXP fmodemCpp(SEXP x, int ng = 0, IntegerVector g = 0, SEXP gs = R_NilValue, SEXP w = R_NilValue, bool narm = true, bool drop = true, int ret = 0) { RCPP_RETURN_MATRIX(fmodemImpl, x, ng, g, gs, w, narm, drop, ret); } collapse/src/fscale.cpp0000644000176200001440000010222514174223734014615 0ustar liggesusers#include using namespace Rcpp; // Notes: // for mean there are 2 options: "overall.mean" = R_NegInf adds the overall mean. default is centering on 0, or centering on a mean provided, or FALSE = R_PosInf -> no centering, scaling preserves mean // for sd there is "within.sd" = R_NegInf, scaling by the frequency weighted within-group sd, default is 1, or scaling by a sd provided. // All other comments are in fvar.cpp (in C++ folder, not on Github) // [[Rcpp::export]] NumericVector fscaleCpp(const NumericVector& x, int ng = 0, const IntegerVector& g = 0, const SEXP& w = R_NilValue, bool narm = true, double set_mean = 0, double set_sd = 1) { // could set mean and sd with SEXP, but complicated... int l = x.size(); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 NumericVector out = no_init_vector(l); // SHALLOW_DUPLICATE_ATTRIB(out, x); // Any speed loss or overwriting attributes ? if (Rf_isNull(w)) { // No weights if (ng == 0) { if(set_sd == R_NegInf) stop("within.sd can only be calculated when a grouping vector is supplied"); if(set_mean == R_NegInf) stop("without groups, centering on the overall mean amounts to scaling without centering, so use mean = FALSE instead, or supply a grouping vector to subtract out group means."); double n = 0, mean = 0, d1 = 0, M2 = 0; if(narm) { int j = l-1; while(std::isnan(x[j]) && j!=0) --j; if(j != 0) { for(int i = j+1; i--; ) { if(std::isnan(x[i])) continue; d1 = x[i]-mean; mean += d1 * (1 / ++n); M2 += d1*(x[i]-mean); } M2 = set_sd/sqrt(M2/(n-1)); // good ? -> Yes, works ! } else { // use goto to make code simpler ? std::fill(out.begin(), out.end(), NA_REAL); SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } } else { for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { std::fill(out.begin(), out.end(), NA_REAL); SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } else { d1 = x[i]-mean; mean += d1*(1 / ++n); M2 += d1*(x[i]-mean); } } M2 = set_sd/sqrt(M2/(l-1)); } if(std::isnan(M2)) { std::fill(out.begin(), out.end(), NA_REAL); } else { if(set_mean == 0) out = (x-mean)*M2; else if(set_mean == R_PosInf) out = (x-mean)*M2 + mean; // best ? // !R_FINITE(set_mean) else out = (x-mean)*M2 + set_mean; // best ? } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); double d1 = 0, gl_mean = 0; // Best way of doing this ? How can you declare variables in global scope ? // NumericVector mean = narm ? no_init_vector(ng) : NumericVector(ng); // works but valgrind issue // NumericVector M2 = narm ? NumericVector(ng, NA_REAL) : NumericVector(ng); // NumericVector n = narm ? NumericVector(ng, 1.0) : NumericVector(ng); NumericVector mean(ng), n(ng, (narm) ? 1.0 : 0.0), M2(ng, (narm) ? NA_REAL : 0.0); if(narm) { for(int i = l; i--; ) { if(std::isnan(x[i])) continue; if(std::isnan(M2[g[i]-1])) { mean[g[i]-1] = x[i]; M2[g[i]-1] = 0; } else { d1 = x[i]-mean[g[i]-1]; mean[g[i]-1] += d1 * (1 / ++n[g[i]-1]); M2[g[i]-1] += d1*(x[i]-mean[g[i]-1]); } } } else { int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(M2[g[i]-1])) continue; if(std::isnan(x[i])) { M2[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) { std::fill(out.begin(), out.end(), NA_REAL); SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } } else { d1 = x[i]-mean[g[i]-1]; mean[g[i]-1] += d1 * (1 / ++n[g[i]-1]); M2[g[i]-1] += d1*(x[i]-mean[g[i]-1]); } } } if(set_sd == R_NegInf) { double within_sd = 0; int sum_n = 0; if(set_mean == R_NegInf) { for(int i = ng; i--; ) { if(std::isnan(M2[i])) continue; within_sd += M2[i]; M2[i] = 1/sqrt(M2[i]/(n[i]-1)); gl_mean += mean[i]*n[i]; sum_n += n[i]; } gl_mean /= sum_n; } else { for(int i = ng; i--; ) { if(std::isnan(M2[i])) continue; within_sd += M2[i]; M2[i] = 1/sqrt(M2[i]/(n[i]-1)); sum_n += n[i]; } gl_mean = set_mean; } within_sd = sqrt(within_sd/(sum_n-1)); M2 = M2 * within_sd; // fastest ? } else { if(set_mean == R_NegInf) { int sum_n = 0; for(int i = ng; i--; ) { if(std::isnan(M2[i])) continue; M2[i] = set_sd/sqrt(M2[i]/(n[i]-1)); gl_mean += mean[i]*n[i]; sum_n += n[i]; } gl_mean /= sum_n; } else { gl_mean = set_mean; for(int i = ng; i--; ) if(!std::isnan(M2[i])) M2[i] = set_sd/sqrt(M2[i]/(n[i]-1)); } } if(set_mean == 0) { for(int i = 0; i != l; ++i) out[i] = (x[i]-mean[g[i]-1])*M2[g[i]-1]; } else if(set_mean == R_PosInf) { for(int i = 0; i != l; ++i) out[i] = (x[i]-mean[g[i]-1])*M2[g[i]-1] + mean[g[i]-1]; // best ? } else { for(int i = 0; i != l; ++i) out[i] = (x[i]-mean[g[i]-1])*M2[g[i]-1] + gl_mean; // best ? } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); if (ng == 0) { if(set_sd == R_NegInf) stop("within.sd can only be calculated when a grouping vector is supplied"); if(set_mean == R_NegInf) stop("without groups, centering on the overall mean amounts to scaling without centering, so use mean = FALSE instead, or supply a grouping vector to subtract out group means."); double sumw = 0, mean = 0, M2 = 0, d1 = 0; if(narm) { int j = l-1; while((std::isnan(x[j]) || std::isnan(wg[j]) || wg[j] == 0) && j!=0) --j; if(j != 0) { for(int i = j+1; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; sumw += wg[i]; d1 = x[i] - mean; mean += d1 * (wg[i] / sumw); M2 += wg[i] * d1 * (x[i] - mean); } } else { std::fill(out.begin(), out.end(), NA_REAL); SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } } else { for(int i = 0; i != l; ++i) { if(std::isnan(x[i]) || std::isnan(wg[i])) { std::fill(out.begin(), out.end(), NA_REAL); SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } else { if(wg[i] == 0) continue; sumw += wg[i]; d1 = x[i] - mean; mean += d1 * (wg[i] / sumw); M2 += wg[i] * d1 * (x[i] - mean); } } } M2 = set_sd/sqrt(M2/(sumw-1)); if(std::isnan(M2)) { std::fill(out.begin(), out.end(), NA_REAL); } else { if(set_mean == 0) out = (x-mean)*M2; else if(set_mean == R_PosInf) out = (x-mean)*M2 + mean; // best ? else out = (x-mean)*M2 + set_mean; // best ? } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); double d1 = 0, gl_mean = 0; // Best way of doing this ? How can you declare variables in overall scope ? // NumericVector M2 = narm ? NumericVector(ng, NA_REAL) : NumericVector(ng); NumericVector M2(ng, (narm) ? NA_REAL : 0.0), mean(ng), sumw(ng); // = narm ? no_init_vector(ng) : NumericVector(ng); // works but valgrind issues // NumericVector sumw = narm ? no_init_vector(ng) : NumericVector(ng); if(narm) { for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; if(std::isnan(M2[g[i]-1])) { sumw[g[i]-1] = wg[i]; mean[g[i]-1] = x[i]; M2[g[i]-1] = 0; } else { sumw[g[i]-1] += wg[i]; d1 = x[i] - mean[g[i]-1]; mean[g[i]-1] += d1 * (wg[i] / sumw[g[i]-1]); M2[g[i]-1] += wg[i] * d1 * (x[i] - mean[g[i]-1]); } } } else { int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(M2[g[i]-1])) continue; if(std::isnan(x[i]) || std::isnan(wg[i])) { M2[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) { std::fill(out.begin(), out.end(), NA_REAL); SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } } else { if(wg[i] == 0) continue; sumw[g[i]-1] += wg[i]; d1 = x[i] - mean[g[i]-1]; mean[g[i]-1] += d1 * (wg[i] / sumw[g[i]-1]); M2[g[i]-1] += wg[i] * d1 * (x[i] - mean[g[i]-1]); } } } if(set_sd == R_NegInf) { double within_sd = 0, sum_sumw = 0; if(set_mean == R_NegInf) { for(int i = ng; i--; ) { if(std::isnan(M2[i])) continue; within_sd += M2[i]; M2[i] = 1/sqrt(M2[i]/(sumw[i]-1)); gl_mean += mean[i]*sumw[i]; sum_sumw += sumw[i]; } gl_mean /= sum_sumw; } else { for(int i = ng; i--; ) { if(std::isnan(M2[i])) continue; within_sd += M2[i]; M2[i] = 1/sqrt(M2[i]/(sumw[i]-1)); sum_sumw += sumw[i]; } gl_mean = set_mean; } within_sd = sqrt(within_sd/(sum_sumw-1)); M2 = M2 * within_sd; // fastest ? } else { if(set_mean == R_NegInf) { double sum_sumw = 0; for(int i = ng; i--; ) { if(std::isnan(M2[i])) continue; M2[i] = set_sd/sqrt(M2[i]/(sumw[i]-1)); gl_mean += mean[i]*sumw[i]; sum_sumw += sumw[i]; } gl_mean /= sum_sumw; } else { gl_mean = set_mean; for(int i = ng; i--; ) if(!std::isnan(M2[i])) M2[i] = set_sd/sqrt(M2[i]/(sumw[i]-1)); } } if(set_mean == 0) { for(int i = 0; i != l; ++i) out[i] = (x[i]-mean[g[i]-1])*M2[g[i]-1]; } else if(set_mean == R_PosInf) { for(int i = 0; i != l; ++i) out[i] = (x[i]-mean[g[i]-1])*M2[g[i]-1] + mean[g[i]-1]; // best ? } else { for(int i = 0; i != l; ++i) out[i] = (x[i]-mean[g[i]-1])*M2[g[i]-1] + gl_mean; // best ? } } } SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } // [[Rcpp::export]] NumericMatrix fscalemCpp(const NumericMatrix& x, int ng = 0, const IntegerVector& g = 0, const SEXP& w = R_NilValue, bool narm = true, double set_mean = 0, double set_sd = 1) { int l = x.nrow(), col = x.ncol(); NumericMatrix out = no_init_matrix(l, col); if (Rf_isNull(w)) { // No weights if(ng == 0) { if(set_sd == R_NegInf) stop("within.sd can only be calculated when a grouping vector is supplied"); if(set_mean == R_NegInf) stop("without groups, centering on the overall mean amounts to scaling without centering, so use mean = FALSE instead, or supply a grouping vector to subtract out group means."); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); double nj = 0, meanj = 0, d1 = 0, M2j = 0; if(narm) { // faster using 2 loops over columns ? int k = l-1; while(std::isnan(column[k]) && k!=0) --k; if(k != 0) { for(int i = k+1; i--; ) { if(std::isnan(column[i])) continue; d1 = column[i]-meanj; meanj += d1 * (1 / ++nj); M2j += d1*(column[i]-meanj); } M2j = set_sd/sqrt(M2j/(nj-1)); } else { std::fill(outj.begin(), outj.end(), NA_REAL); continue; // Necessary } } else { for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { M2j = NA_REAL; break; } else { d1 = column[i]-meanj; meanj += d1 * (1 / ++nj); M2j += d1*(column[i]-meanj); } } M2j = set_sd/sqrt(M2j/(l-1)); } if(std::isnan(M2j)) { std::fill(outj.begin(), outj.end(), NA_REAL); } else { if(set_mean == 0) outj = (column-meanj)*M2j; else if(set_mean == R_PosInf) outj = (column-meanj)*M2j + meanj; // best ? else outj = (column-meanj)*M2j + set_mean; // best ? } } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); // Better way ? NumericVector meanj(ng), nj(ng), M2j(ng); // NumericVector meanj = no_init_vector(ng), nj = no_init_vector(ng), M2j = no_init_vector(ng); // Works but valgrind issue for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); double d1 = 0, gl_meanj = 0; if(narm) { // better do two loops ?? std::fill(M2j.begin(), M2j.end(), NA_REAL); for(int i = l; i--; ) { if(std::isnan(column[i])) continue; if(std::isnan(M2j[g[i]-1])) { meanj[g[i]-1] = column[i]; M2j[g[i]-1] = 0; nj[g[i]-1] = 1; } else { d1 = column[i]-meanj[g[i]-1]; meanj[g[i]-1] += d1 * (1 / ++nj[g[i]-1]); M2j[g[i]-1] += d1*(column[i]-meanj[g[i]-1]); } } } else { for(int i = ng; i--; ) meanj[i] = M2j[i] = nj[i] = 0; int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(M2j[g[i]-1])) continue; if(std::isnan(column[i])) { M2j[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) { std::fill(outj.begin(), outj.end(), NA_REAL); goto loopend; } } else { d1 = column[i]-meanj[g[i]-1]; meanj[g[i]-1] += d1 * (1 / ++nj[g[i]-1]); M2j[g[i]-1] += d1*(column[i]-meanj[g[i]-1]); } } } if(set_sd == R_NegInf) { // best way of coding ? Goes through all the if conditions for every column... double within_sdj = 0; int sum_nj = 0; if(set_mean == R_NegInf) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; within_sdj += M2j[i]; M2j[i] = 1/sqrt(M2j[i]/(nj[i]-1)); gl_meanj += meanj[i]*nj[i]; sum_nj += nj[i]; } gl_meanj /= sum_nj; } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; within_sdj += M2j[i]; M2j[i] = 1/sqrt(M2j[i]/(nj[i]-1)); sum_nj += nj[i]; } gl_meanj = set_mean; } within_sdj = sqrt(within_sdj/(sum_nj-1)); M2j = M2j * within_sdj; // fastest ? } else { if(set_mean == R_NegInf) { int sum_nj = 0; for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; M2j[i] = set_sd/sqrt(M2j[i]/(nj[i]-1)); gl_meanj += meanj[i]*nj[i]; sum_nj += nj[i]; } gl_meanj /= sum_nj; } else { gl_meanj = set_mean; for(int i = ng; i--; ) if(!std::isnan(M2j[i])) M2j[i] = set_sd/sqrt(M2j[i]/(nj[i]-1)); } } if(set_mean == 0) { for(int i = 0; i != l; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1]; } else if(set_mean == R_PosInf) { for(int i = 0; i != l; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1] + meanj[g[i]-1]; // best ? } else { for(int i = 0; i != l; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1] + gl_meanj; // best ? } loopend:; } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match nrow(X)"); if(ng == 0) { if(set_sd == R_NegInf) stop("within.sd can only be calculated when a grouping vector is supplied"); if(set_mean == R_NegInf) stop("without groups, centering on the overall mean amounts to scaling without centering, so use mean = FALSE instead, or supply a grouping vector to subtract out group means."); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); double sumwj = 0, meanj = 0, M2j = 0, d1 = 0; if(narm) { int k = l-1; while((std::isnan(column[k]) || std::isnan(wg[k]) || wg[k] == 0) && k!=0) --k; if(k != 0) { for(int i = k+1; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; sumwj += wg[i]; d1 = column[i] - meanj; meanj += d1 * (wg[i] / sumwj); M2j += wg[i] * d1 * (column[i] - meanj); } } else { std::fill(outj.begin(), outj.end(), NA_REAL); continue; // Necessary } } else { for(int i = 0; i != l; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { M2j = NA_REAL; break; } else { if(wg[i] == 0) continue; sumwj += wg[i]; d1 = column[i] - meanj; meanj += d1 * (wg[i] / sumwj); M2j += wg[i] * d1 * (column[i] - meanj); } } } M2j = set_sd/sqrt(M2j/(sumwj-1)); if(std::isnan(M2j)) { std::fill(outj.begin(), outj.end(), NA_REAL); } else { if(set_mean == 0) outj = (column-meanj)*M2j; else if(set_mean == R_PosInf) outj = (column-meanj)*M2j + meanj; // best ? else outj = (column-meanj)*M2j + set_mean; // best ? } } } else { // with groups and weights if(g.size() != l) stop("length(g) must match nrow(X)"); // Works but valgrind issue // NumericVector meanj = no_init_vector(ng), sumwj = no_init_vector(ng), M2j = no_init_vector(ng); NumericVector meanj(ng), sumwj(ng), M2j(ng); // better for valgrind for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); double d1 = 0, gl_meanj = 0; if(narm) { std::fill(M2j.begin(), M2j.end(), NA_REAL); for(int i = l; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; if(std::isnan(M2j[g[i]-1])) { sumwj[g[i]-1] = wg[i]; meanj[g[i]-1] = column[i]; M2j[g[i]-1] = 0; } else { sumwj[g[i]-1] += wg[i]; d1 = column[i] - meanj[g[i]-1]; meanj[g[i]-1] += d1 * (wg[i] / sumwj[g[i]-1]); M2j[g[i]-1] += wg[i] * d1 * (column[i] - meanj[g[i]-1]); } } } else { for(int i = ng; i--; ) meanj[i] = M2j[i] = sumwj[i] = 0; int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(M2j[g[i]-1])) continue; if(std::isnan(column[i]) || std::isnan(wg[i])) { M2j[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) { std::fill(outj.begin(), outj.end(), NA_REAL); goto loopend2; } } else { if(wg[i] == 0) continue; sumwj[g[i]-1] += wg[i]; d1 = column[i] - meanj[g[i]-1]; meanj[g[i]-1] += d1 * (wg[i] / sumwj[g[i]-1]); M2j[g[i]-1] += wg[i] * d1 * (column[i] - meanj[g[i]-1]); } } } if(set_sd == R_NegInf) { // best way of coding ? Goes through all the if conditions for every column... double within_sdj = 0, sum_sumwj = 0; if(set_mean == R_NegInf) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; within_sdj += M2j[i]; M2j[i] = 1/sqrt(M2j[i]/(sumwj[i]-1)); gl_meanj += meanj[i]*sumwj[i]; sum_sumwj += sumwj[i]; } gl_meanj /= sum_sumwj; } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; within_sdj += M2j[i]; M2j[i] = 1/sqrt(M2j[i]/(sumwj[i]-1)); sum_sumwj += sumwj[i]; } gl_meanj = set_mean; } within_sdj = sqrt(within_sdj/(sum_sumwj-1)); M2j = M2j * within_sdj; // fastest ? } else { if(set_mean == R_NegInf) { double sum_sumwj = 0; for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; M2j[i] = set_sd/sqrt(M2j[i]/(sumwj[i]-1)); gl_meanj += meanj[i]*sumwj[i]; sum_sumwj += sumwj[i]; } gl_meanj /= sum_sumwj; } else { gl_meanj = set_mean; for(int i = ng; i--; ) if(!std::isnan(M2j[i])) M2j[i] = set_sd/sqrt(M2j[i]/(sumwj[i]-1)); } } if(set_mean == 0) { for(int i = 0; i != l; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1]; } else if(set_mean == R_PosInf) { for(int i = 0; i != l; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1] + meanj[g[i]-1]; // best ? } else { for(int i = 0; i != l; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1] + gl_meanj; // best ? } loopend2:; } } } SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } // [[Rcpp::export]] List fscalelCpp(const List& x, int ng = 0, const IntegerVector& g = 0, const SEXP& w = R_NilValue, bool narm = true, double set_mean = 0, double set_sd = 1) { int l = x.size(); List out(l); if (Rf_isNull(w)) { // No weights if(ng == 0) { if(set_sd == R_NegInf) stop("within.sd can only be calculated when a grouping vector is supplied"); if(set_mean == R_NegInf) stop("without groups, centering on the overall mean amounts to scaling without centering, so use mean = FALSE instead, or supply a grouping vector to subtract out group means."); for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); NumericVector outj = no_init_vector(row); double nj = 0, meanj = 0, d1 = 0, M2j = 0; if(narm) { int k = row-1; while(std::isnan(column[k]) && k!=0) --k; if(k != 0) { for(int i = k+1; i--; ) { if(std::isnan(column[i])) continue; d1 = column[i]-meanj; meanj += d1 * (1 / ++nj); M2j += d1*(column[i]-meanj); } M2j = set_sd/sqrt(M2j/(nj-1)); } else { std::fill(outj.begin(), outj.end(), NA_REAL); // outj = rep(NA_REAL, row); // fastest option ! (faster than std::fill) goto loopend; // Necessary } } else { for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) { M2j = NA_REAL; break; } else { d1 = column[i]-meanj; meanj += d1 * (1 / ++nj); M2j += d1*(column[i]-meanj); } } M2j = set_sd/sqrt(M2j/(row-1)); } if(std::isnan(M2j)) { std::fill(outj.begin(), outj.end(), NA_REAL); } else { if(set_mean == 0) outj = (column-meanj)*M2j; else if(set_mean == R_PosInf) outj = (column-meanj)*M2j + meanj; // best ? else outj = (column-meanj)*M2j + set_mean; // best ? } loopend:; SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } else { // with groups int gss = g.size(); // Better way ? NumericVector meanj(ng), nj(ng), M2j(ng); // NumericVector meanj = no_init_vector(ng), nj = no_init_vector(ng), M2j = no_init_vector(ng); // Works but valgrind issue for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector outj = no_init_vector(gss); double d1 = 0, gl_meanj = 0; if(narm) { // better do two loops ? std::fill(M2j.begin(), M2j.end(), NA_REAL); for(int i = gss; i--; ) { if(std::isnan(column[i])) continue; if(std::isnan(M2j[g[i]-1])) { meanj[g[i]-1] = column[i]; M2j[g[i]-1] = 0; nj[g[i]-1] = 1; } else { d1 = column[i]-meanj[g[i]-1]; meanj[g[i]-1] += d1 * (1 / ++nj[g[i]-1]); M2j[g[i]-1] += d1*(column[i]-meanj[g[i]-1]); } } } else { for(int i = ng; i--; ) meanj[i] = M2j[i] = nj[i] = 0; int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(M2j[g[i]-1])) continue; if(std::isnan(column[i])) { M2j[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) { std::fill(outj.begin(), outj.end(), NA_REAL); goto loopend2; } } else { d1 = column[i]-meanj[g[i]-1]; meanj[g[i]-1] += d1 * (1 / ++nj[g[i]-1]); M2j[g[i]-1] += d1*(column[i]-meanj[g[i]-1]); } } } if(set_sd == R_NegInf) { // best way of coding ? Goes through all the if conditions for every column... double within_sdj = 0; int sum_nj = 0; if(set_mean == R_NegInf) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; within_sdj += M2j[i]; M2j[i] = 1/sqrt(M2j[i]/(nj[i]-1)); gl_meanj += meanj[i]*nj[i]; sum_nj += nj[i]; } gl_meanj /= sum_nj; } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; within_sdj += M2j[i]; M2j[i] = 1/sqrt(M2j[i]/(nj[i]-1)); sum_nj += nj[i]; } gl_meanj = set_mean; } within_sdj = sqrt(within_sdj/(sum_nj-1)); M2j = M2j * within_sdj; // fastest ? } else { if(set_mean == R_NegInf) { int sum_nj = 0; for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; M2j[i] = set_sd/sqrt(M2j[i]/(nj[i]-1)); gl_meanj += meanj[i]*nj[i]; sum_nj += nj[i]; } gl_meanj /= sum_nj; } else { gl_meanj = set_mean; for(int i = ng; i--; ) if(!std::isnan(M2j[i])) M2j[i] = set_sd/sqrt(M2j[i]/(nj[i]-1)); } } if(set_mean == 0) { for(int i = 0; i != gss; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1]; } else if(set_mean == R_PosInf) { for(int i = 0; i != gss; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1] + meanj[g[i]-1]; // best ? } else { for(int i = 0; i != gss; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1] + gl_meanj; // best ? } loopend2:; SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } } else { // With weights NumericVector wg = w; int wgs = wg.size(); if(ng == 0) { if(set_sd == R_NegInf) stop("within.sd can only be calculated when a grouping vector is supplied"); if(set_mean == R_NegInf) stop("without groups, centering on the overall mean amounts to scaling without centering, so use mean = FALSE instead, or supply a grouping vector to subtract out group means."); for(int j = l; j--; ) { NumericVector column = x[j]; if(wgs != column.size()) stop("length(w) must match nrow(X)"); NumericVector outj = no_init_vector(wgs); double sumwj = 0, meanj = 0, M2j = 0, d1 = 0; if(narm) { int k = wgs-1; while((std::isnan(column[k]) || std::isnan(wg[k]) || wg[k] == 0) && k!=0) --k; if(k != 0) { for(int i = k+1; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; sumwj += wg[i]; d1 = column[i] - meanj; meanj += d1 * (wg[i] / sumwj); M2j += wg[i] * d1 * (column[i] - meanj); } } else { std::fill(outj.begin(), outj.end(), NA_REAL); goto loopend3; // Necessary } } else { for(int i = 0; i != wgs; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { M2j = NA_REAL; break; } else { if(wg[i] == 0) continue; sumwj += wg[i]; d1 = column[i] - meanj; meanj += d1 * (wg[i] / sumwj); M2j += wg[i] * d1 * (column[i] - meanj); } } } M2j = set_sd/sqrt(M2j/(sumwj-1)); if(std::isnan(M2j)) { std::fill(outj.begin(), outj.end(), NA_REAL); } else { if(set_mean == 0) outj = (column-meanj)*M2j; else if(set_mean == R_PosInf) outj = (column-meanj)*M2j + meanj; // best ? else outj = (column-meanj)*M2j + set_mean; // best ? } loopend3:; SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } else { // with groups and weights int gss = g.size(); if(gss != wgs) stop("length(w) must match length(g)"); NumericVector meanj(ng), sumwj(ng), M2j(ng); // NumericVector meanj = no_init_vector(ng), sumwj = no_init_vector(ng), M2j = no_init_vector(ng); // Works but valgrind issue for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector outj = no_init_vector(gss); double d1 = 0, gl_meanj = 0; if(narm) { std::fill(M2j.begin(), M2j.end(), NA_REAL); for(int i = gss; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; if(std::isnan(M2j[g[i]-1])) { sumwj[g[i]-1] = wg[i]; meanj[g[i]-1] = column[i]; M2j[g[i]-1] = 0; } else { sumwj[g[i]-1] += wg[i]; d1 = column[i] - meanj[g[i]-1]; meanj[g[i]-1] += d1 * (wg[i] / sumwj[g[i]-1]); M2j[g[i]-1] += wg[i] * d1 * (column[i] - meanj[g[i]-1]); } } } else { for(int i = ng; i--; ) meanj[i] = M2j[i] = sumwj[i] = 0; int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(M2j[g[i]-1])) continue; if(std::isnan(column[i]) || std::isnan(wg[i])) { M2j[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) { std::fill(outj.begin(), outj.end(), NA_REAL); goto loopend4; } } else { if(wg[i] == 0) continue; sumwj[g[i]-1] += wg[i]; d1 = column[i] - meanj[g[i]-1]; meanj[g[i]-1] += d1 * (wg[i] / sumwj[g[i]-1]); M2j[g[i]-1] += wg[i] * d1 * (column[i] - meanj[g[i]-1]); } } } if(set_sd == R_NegInf) { // best way of coding ? Goes through all the if conditions for every column... double within_sdj = 0, sum_sumwj = 0; if(set_mean == R_NegInf) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; within_sdj += M2j[i]; M2j[i] = 1/sqrt(M2j[i]/(sumwj[i]-1)); gl_meanj += meanj[i]*sumwj[i]; sum_sumwj += sumwj[i]; } gl_meanj /= sum_sumwj; } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; within_sdj += M2j[i]; M2j[i] = 1/sqrt(M2j[i]/(sumwj[i]-1)); sum_sumwj += sumwj[i]; } gl_meanj = set_mean; } within_sdj = sqrt(within_sdj/(sum_sumwj-1)); M2j = M2j * within_sdj; // fastest ? } else { if(set_mean == R_NegInf) { double sum_sumwj = 0; for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; M2j[i] = set_sd/sqrt(M2j[i]/(sumwj[i]-1)); gl_meanj += meanj[i]*sumwj[i]; sum_sumwj += sumwj[i]; } gl_meanj /= sum_sumwj; } else { gl_meanj = set_mean; for(int i = ng; i--; ) if(!std::isnan(M2j[i])) M2j[i] = set_sd/sqrt(M2j[i]/(sumwj[i]-1)); } } if(set_mean == 0) { for(int i = 0; i != gss; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1]; } else if(set_mean == R_PosInf) { for(int i = 0; i != gss; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1] + meanj[g[i]-1]; // best ? } else { for(int i = 0; i != gss; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1] + gl_meanj; // best ? } loopend4:; SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } } SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } collapse/src/fnth_fmedian.cpp0000644000176200001440000007005514174223734016007 0ustar liggesusers// [[Rcpp::plugins(cpp11)]] #include #define STRICT_R_HEADERS #include #include using namespace Rcpp; extern "C" { #include "base_radixsort.h" } auto isnan2 = [](double x) { return x != x; }; auto nisnan = [](double x) { return x == x; }; //[[Rcpp::export]] NumericVector fnthCpp(const NumericVector& x, double Q = 0.5, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, int ret = 1) { int l = x.size(); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 bool tiesmean, lower; if(Q <= 0 || Q == 1) stop("n needs to be between 0 and 1, or between 1 and length(x). Use fmin and fmax for minima and maxima."); if(Q > 1) { tiesmean = false; lower = true; if(ng == 0) { if(Q >= l) stop("n needs to be between 0 and 1, or between 1 and length(x). Use fmin and fmax for minima and maxima."); Q = (Q-1)/(l-1); } else { if(Q >= l/ng) stop("n needs to be between 0 and 1, or between 1 and the length(x)/ng, with ng the number of groups. Use fmin and fmax for minima and maxima."); Q = (Q-1)/(l/ng-1); } } else { tiesmean = ret == 1; lower = ret != 3; } if(Rf_isNull(w)) { if(ng == 0) { NumericVector out(1); if(narm) { NumericVector xd = no_init_vector(l); auto pend = std::remove_copy_if(x.begin(), x.end(), xd.begin(), isnan2); int sz = pend - xd.begin(), nth = lower ? (sz-1)*Q : sz*Q; // return NumericVector::create(sz, (sz-1)*Q, sz*Q-1, sz*Q); if(sz == 0) return Rf_ScalarReal(x[0]); std::nth_element(xd.begin(), xd.begin()+nth, pend); out = (tiesmean && sz%2 == 0) ? (xd[nth] + *(std::min_element(xd.begin()+nth+1, pend)))*0.5 : xd[nth]; } else { for(int i = 0; i != l; ++i) if(isnan2(x[i])) return Rf_ScalarReal(x[i]); NumericVector xd = Rf_duplicate(x); int nth = lower ? (l-1)*Q : l*Q; std::nth_element(xd.begin(), xd.begin()+nth, xd.end()); out = (tiesmean && l%2 == 0) ? (xd[nth] + *(std::min_element(xd.begin()+nth+1, xd.end())))*0.5 : xd[nth]; } return out; } else { // with groups if(l != g.size()) stop("length(g) must match length(x)"); int ngp = ng+1; std::vector > gmap(ngp); std::vector gcount(ngp); if(Rf_isNull(gs)) { for(int i = 0; i != l; ++i) ++gcount[g[i]]; for(int i = 1; i != ngp; ++i) { if(gcount[i] == 0) stop("Group size of 0 encountered. This is probably because of unused factor levels. Use fdroplevels(f) to drop them."); gmap[i] = std::vector (gcount[i]); gcount[i] = 0; } } else { IntegerVector gsv = gs; if(ng != gsv.size()) stop("ng must match length(gs)"); for(int i = 0; i != ng; ++i) { if(gsv[i] == 0) stop("Group size of 0 encountered. This is probably because of unused factor levels. Use fdroplevels(f) to drop them."); gmap[i+1] = std::vector (gsv[i]); } } if(narm) { NumericVector out(ng, NA_REAL); for(int i = 0; i != l; ++i) if(nisnan(x[i])) gmap[g[i]][gcount[g[i]]++] = x[i]; for(int i = 1; i != ngp; ++i) { if(gcount[i] != 0) { int n = gcount[i], nth = lower ? (n-1)*Q : n*Q; auto begin = gmap[i].begin(), mid = begin + nth, end = begin + n; std::nth_element(begin, mid, end); out[i-1] = (tiesmean && n%2 == 0) ? (*(mid) + *(std::min_element(mid+1, end)))*0.5 : *(mid); } } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); return out; } else { NumericVector out(ng); int ngs = 0; for(int i = 0; i != l; ++i) { if(isnan2(x[i])) { if(nisnan(out[g[i]-1])) { out[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } } else { gmap[g[i]][gcount[g[i]]++] = x[i]; } } for(int i = 0; i != ng; ++i) { if(isnan2(out[i])) continue; int n = gcount[i+1], nth = lower ? (n-1)*Q : n*Q; auto begin = gmap[i+1].begin(), mid = begin + nth, end = begin + n; std::nth_element(begin, mid, end); out[i] = (tiesmean && n%2 == 0) ? (*(mid) + *(std::min_element(mid+1, end)))*0.5 : *(mid); } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); return out; } } } else { // with weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); IntegerVector o = no_init_vector(l); int *ord = INTEGER(o); Cdoubleradixsort(ord, TRUE, FALSE, x); // starts from 1 if(ng == 0) { double wsumQ = 0, wsum = wg[o[0]-1]; int k = 1; if(narm) { for(int i = 0; i != l; ++i) if(nisnan(x[i])) wsumQ += wg[i]; // && nisnan(wg[i]) if(wsumQ == 0) return Rf_ScalarReal(NA_REAL); wsumQ *= Q; } else { if(isnan2(x[o[l-1]-1])) return Rf_ScalarReal(NA_REAL); wsumQ = std::accumulate(wg.begin(), wg.end(), 0.0) * Q; } if(isnan2(wsumQ)) stop("Missing weights in order statistics are currently only supported if x is also missing"); // return Rf_ScalarReal(NA_REAL); if(lower) { while(wsum < wsumQ) wsum += wg[o[k++]-1]; if(tiesmean && wsum == wsumQ) { double out = x[o[k-1]-1], n = 2; while(wg[o[k]-1] == 0) { out += x[o[k++]-1]; ++n; } return Rf_ScalarReal((out + x[o[k]-1]) / n); } } else { while(wsum <= wsumQ) wsum += wg[o[k++]-1]; } return Rf_ScalarReal(x[o[k-1]-1]); } else { // with groups and weights if(l != g.size()) stop("length(g) must match length(x)"); NumericVector wsumQ(ng), wsum(ng), out(ng, NA_REAL); if(narm) { for(int i = 0; i != l; ++i) if(nisnan(x[i])) wsumQ[g[i]-1] += wg[i]; // && nisnan(wg[i]) for(int i = ng; i--; ) { if(isnan2(wsumQ[i])) stop("Missing weights in order statistics are currently only supported if x is also missing"); wsumQ[i] *= Q; } } else { for(int i = 0; i != l; ++i) { if(nisnan(x[i])) { wsumQ[g[i]-1] += wg[i]; } else { wsum[g[i]-1] = DBL_MAX; // OK ? // needed ? // = wsumQ[g[i]-1] } } for(int i = ng; i--; ) { if(isnan2(wsumQ[i]) && wsum[i] != DBL_MAX) stop("Missing weights in order statistics are currently only supported if x is also missing"); wsumQ[i] *= Q; } } // wsumQ = wsumQ * Q; int gi, oi; if(tiesmean) { std::vector seen(ng); std::vector n(ng, 1); // only needed for 0 weights. Could check above if there are any. for(int i = 0; i != l; ++i) { oi = o[i]-1; gi = g[oi]-1; if(seen[gi]) continue; if(wsum[gi] < wsumQ[gi]) out[gi] = x[oi]; else { if(wsum[gi] > wsumQ[gi]) { seen[gi] = true; continue; } out[gi] += (x[oi]-out[gi])/++n[gi]; // https://stackoverflow.com/questions/28820904/how-to-efficiently-compute-average-on-the-fly-moving-average } wsum[gi] += wg[oi]; } } else if(lower) { for(int i = 0; i != l; ++i) { oi = o[i]-1; gi = g[oi]-1; if(wsum[gi] < wsumQ[gi]) { wsum[gi] += wg[oi]; out[gi] = x[oi]; } } } else { for(int i = 0; i != l; ++i) { oi = o[i]-1; gi = g[oi]-1; if(wsum[gi] <= wsumQ[gi]) { wsum[gi] += wg[oi]; out[gi] = x[oi]; } } } if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); return out; } } } //[[Rcpp::export]] SEXP fnthmCpp(const NumericMatrix& x, double Q = 0.5, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, bool drop = true, int ret = 1) { int l = x.nrow(), col = x.ncol(); bool tiesmean, lower; if(Q <= 0 || Q == 1) stop("n needs to be between 0 and 1, or between 1 and nrow(x). Use fmin and fmax for minima and maxima."); if(Q > 1) { tiesmean = false; lower = true; if(ng == 0) { if(Q >= l) stop("n needs to be between 0 and 1, or between 1 and nrow(x). Use fmin and fmax for minima and maxima."); Q = (Q-1)/(l-1); } else { if(Q >= l/ng) stop("n needs to be between 0 and 1, or between 1 and the nrow(x)/ng, with ng the number of groups. Use fmin and fmax for minima and maxima."); Q = (Q-1)/(l/ng-1); } } else { tiesmean = ret == 1; lower = ret != 3; } if(Rf_isNull(w)) { if(ng == 0) { NumericVector out = no_init_vector(col); if(narm) { NumericVector column = no_init_vector(l); auto begin = column.begin(); for(int j = col; j--; ) { NumericMatrix::ConstColumn colj = x( _ , j); auto pend = std::remove_copy_if(colj.begin(), colj.end(), begin, isnan2); int sz = pend - begin, nth = lower ? (sz-1)*Q : sz*Q; if(sz == 0) { out[j] = colj[0]; continue; } std::nth_element(begin, begin+nth, pend); out[j] = (tiesmean && sz%2 == 0) ? (column[nth] + *(std::min_element(begin+nth+1, pend)))*0.5 : column[nth]; } } else { int nth = lower ? (l-1)*Q : l*Q; bool tm = tiesmean && l%2 == 0; for(int j = col; j--; ) { { NumericMatrix::ConstColumn colj = x( _ , j); for(int i = 0; i != l; ++i) { if(isnan2(colj[i])) { out[j] = colj[i]; goto endloop; } } NumericVector column = Rf_duplicate(wrap(colj)); // best ? std::nth_element(column.begin(), column.begin()+nth, column.end()); out[j] = tm ? (column[nth] + *(std::min_element(column.begin()+nth+1, column.end())))*0.5 : column[nth]; } endloop:; } } if(drop) Rf_setAttrib(out, R_NamesSymbol, colnames(x)); else { Rf_dimgets(out, Dimension(1, col)); colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); } return out; } else { // with groups if(l != g.size()) stop("length(g) must match nrow(x)"); int ngp = ng+1; std::vector > gmap(ngp); std::vector gcount(ngp); if(Rf_isNull(gs)) { for(int i = 0; i != l; ++i) ++gcount[g[i]]; for(int i = 1; i != ngp; ++i) { if(gcount[i] == 0) stop("Group size of 0 encountered. This is probably because of unused factor levels. Use fdroplevels(f) to drop them."); gmap[i] = std::vector (gcount[i]); } } else { IntegerVector gsv = gs; if(ng != gsv.size()) stop("ng must match length(gs)"); for(int i = 0; i != ng; ++i) { if(gsv[i] == 0) stop("Group size of 0 encountered. This is probably because of unused factor levels. Use fdroplevels(f) to drop them."); gmap[i+1] = std::vector (gsv[i]); } } if(narm) { NumericMatrix out = no_init_matrix(ng, col); std::fill(out.begin(), out.end(), NA_REAL); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column nthj = out( _ , j); gcount.assign(ngp, 0); for(int i = 0; i != l; ++i) if(nisnan(column[i])) gmap[g[i]][gcount[g[i]]++] = column[i]; for(int i = 1; i != ngp; ++i) { if(gcount[i] != 0) { int n = gcount[i], nth = lower ? (n-1)*Q : n*Q; auto begin = gmap[i].begin(), mid = begin + nth, end = begin + n; std::nth_element(begin, mid, end); nthj[i-1] = (tiesmean && n%2 == 0) ? (*(mid) + *(std::min_element(mid+1, end)))*0.5 : *(mid); } } } colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); return out; } else { NumericMatrix out(ng, col); // no init numerically unstable for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column nthj = out( _ , j); gcount.assign(ngp, 0); int ngs = 0; for(int i = 0; i != l; ++i) { if(isnan2(column[i])) { if(nisnan(nthj[g[i]-1])) { nthj[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } } else { gmap[g[i]][gcount[g[i]]++] = column[i]; } } for(int i = 0; i != ng; ++i) { if(isnan2(nthj[i])) continue; int n = gcount[i+1], nth = lower ? (n-1)*Q : n*Q; auto begin = gmap[i+1].begin(), mid = begin + nth, end = begin + n; std::nth_element(begin, mid, end); nthj[i] = (tiesmean && n%2 == 0) ? (*(mid) + *(std::min_element(mid+1, end)))*0.5 : *(mid); } } colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); return out; } } } else { // with weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match nrow(x)"); IntegerVector o = no_init_vector(l); int *ord = INTEGER(o); if(ng == 0) { NumericVector out = no_init_vector(col); double wsumQ = 0; if(!narm) { wsumQ = std::accumulate(wg.begin(), wg.end(), 0.0) * Q; if(isnan2(wsumQ)) { stop("Missing weights in order statistics are currently only supported if x is also missing"); // std::fill(out.begin(), out.end(), NA_REAL); // goto outnth; } } for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); Cdoubleradixsort(ord, TRUE, FALSE, wrap(column)); // starts from 1.... if(narm) { wsumQ = 0; for(int i = 0; i != l; ++i) if(nisnan(column[i])) wsumQ += wg[i]; // && nisnan(wg[i]) if(wsumQ == 0) { out[j] = NA_REAL; continue; } if(isnan2(wsumQ)) stop("Missing weights in order statistics are currently only supported if x is also missing"); wsumQ *= Q; } else { if(isnan2(column[o[l-1]-1])) { out[j] = NA_REAL; continue; } } double wsum = wg[o[0]-1]; int k = 1; // what about all missing ? -> gives dsort error... if(lower) { while(wsum < wsumQ) wsum += wg[o[k++]-1]; if(tiesmean && wsum == wsumQ) { double outtmp = column[o[k-1]-1], n = 2; while(wg[o[k]-1] == 0) { outtmp += column[o[k++]-1]; ++n; } out[j] = (outtmp + column[o[k]-1]) / n; continue; } } else { while(wsum <= wsumQ) wsum += wg[o[k++]-1]; } out[j] = column[o[k-1]-1]; } // outnth: if(drop) Rf_setAttrib(out, R_NamesSymbol, colnames(x)); else { Rf_dimgets(out, Dimension(1, col)); colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); } return out; } else { // with groups and weights if(l != g.size()) stop("length(g) must match nrow(x)"); int gi, oi; NumericMatrix out = no_init_matrix(ng, col); std::fill(out.begin(), out.end(), NA_REAL); NumericVector wsumQ(ng); if(!narm) { for(int i = 0; i != l; ++i) wsumQ[g[i]-1] += wg[i]; wsumQ = wsumQ * Q; } for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column nthj = out( _ , j); Cdoubleradixsort(ord, TRUE, FALSE, wrap(column)); NumericVector wsum(ng); if(narm) { std::fill(wsumQ.begin(), wsumQ.end(), 0.0); for(int i = 0; i != l; ++i) if(nisnan(column[i])) wsumQ[g[i]-1] += wg[i]; // && nisnan(wg[i]) for(int i = ng; i--; ) { if(isnan2(wsumQ[i])) stop("Missing weights in order statistics are currently only supported if x is also missing"); wsumQ[i] *= Q; } } else { for(int i = 0; i != l; ++i) if(isnan2(column[i])) wsum[g[i]-1] = DBL_MAX; // OK ?? // needed?? for(int i = ng; i--; ) if(isnan2(wsumQ[i]) && wsum[i] != DBL_MAX) stop("Missing weights in order statistics are currently only supported if x is also missing"); } if(tiesmean) { std::vector seen(ng); std::vector n(ng, 1); // only needed for 0 weights. Could check above if there are any. for(int i = 0; i != l; ++i) { oi = o[i]-1; gi = g[oi]-1; if(seen[gi]) continue; if(wsum[gi] < wsumQ[gi]) nthj[gi] = column[oi]; else { if(wsum[gi] > wsumQ[gi]) { seen[gi] = true; continue; } nthj[gi] += (column[oi]-nthj[gi])/++n[gi]; } wsum[gi] += wg[oi]; } } else if(lower) { for(int i = 0; i != l; ++i) { oi = o[i]-1; gi = g[oi]-1; if(wsum[gi] < wsumQ[gi]) { wsum[gi] += wg[oi]; nthj[gi] = column[oi]; } } } else { for(int i = 0; i != l; ++i) { oi = o[i]-1; gi = g[oi]-1; if(wsum[gi] <= wsumQ[gi]) { wsum[gi] += wg[oi]; nthj[gi] = column[oi]; } } } } colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); return out; } } } //[[Rcpp::export]] SEXP fnthlCpp(const List& x, double Q = 0.5, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, bool drop = true, int ret = 1) { int l = x.size(), lx1 = Rf_length(x[0]); bool tiesmean, lower; if(Q <= 0 || Q == 1) stop("n needs to be between 0 and 1, or between 1 and nrow(x). Use fmin and fmax for minima and maxima."); if(Q > 1) { tiesmean = false; lower = true; if(ng == 0) { if(Q >= lx1) stop("n needs to be between 0 and 1, or between 1 and nrow(x). Use fmin and fmax for minima and maxima."); Q = (Q-1)/(lx1-1); } else { if(Q >= lx1/ng) stop("n needs to be between 0 and 1, or between 1 and the nrow(x)/ng, with ng the number of groups. Use fmin and fmax for minima and maxima."); Q = (Q-1)/(lx1/ng-1); } } else { tiesmean = ret == 1; lower = ret != 3; } if(Rf_isNull(w)) { if(ng == 0) { NumericVector out = no_init_vector(l); if(narm) { for(int j = l; j--; ) { NumericVector colj = x[j]; NumericVector column = no_init_vector(colj.size()); auto begin = column.begin(); auto pend = std::remove_copy_if(colj.begin(), colj.end(), begin, isnan2); int sz = pend - begin, nth = lower ? (sz-1)*Q : sz*Q; if(sz == 0) { out[j] = colj[0]; continue; } std::nth_element(begin, begin+nth, pend); out[j] = (tiesmean && sz%2 == 0) ? (column[nth] + *(std::min_element(begin+nth+1, pend)))*0.5 : column[nth]; } } else { for(int j = l; j--; ) { { NumericVector colj = x[j]; int row = colj.size(), nth = lower ? (row-1)*Q : row*Q; for(int i = 0; i != row; ++i) { if(isnan2(colj[i])) { out[j] = colj[i]; goto endloop; } } NumericVector column = Rf_duplicate(colj); auto begin = column.begin(); std::nth_element(begin, begin+nth, column.end()); out[j] = (tiesmean && row%2 == 0) ? (column[nth] + *(std::min_element(begin+nth+1, column.end())))*0.5 : column[nth]; } endloop:; } } if(drop) { Rf_setAttrib(out, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return out; } else { List outl(l); for(int j = l; j--; ) { outl[j] = out[j]; SHALLOW_DUPLICATE_ATTRIB(outl[j], x[j]); } SHALLOW_DUPLICATE_ATTRIB(outl, x); Rf_setAttrib(outl, R_RowNamesSymbol, Rf_ScalarInteger(1)); return outl; } } else { // with groups if(lx1 != g.size()) stop("length(g) must match nrow(x)"); List out(l); int ngp = ng+1; std::vector > gmap(ngp); std::vector gcount(ngp); if(Rf_isNull(gs)) { for(int i = 0; i != lx1; ++i) ++gcount[g[i]]; for(int i = 1; i != ngp; ++i) { if(gcount[i] == 0) stop("Group size of 0 encountered. This is probably because of unused factor levels. Use fdroplevels(f) to drop them."); gmap[i] = std::vector (gcount[i]); } } else { IntegerVector gsv = gs; if(ng != gsv.size()) stop("ng must match length(gs)"); for(int i = 0; i != ng; ++i) { if(gsv[i] == 0) stop("Group size of 0 encountered. This is probably because of unused factor levels. Use fdroplevels(f) to drop them."); gmap[i+1] = std::vector (gsv[i]); } } if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; NumericVector nthj(ng, NA_REAL); if(lx1 != column.size()) stop("length(g) must match nrow(x)"); gcount.assign(ngp, 0); for(int i = 0; i != lx1; ++i) if(nisnan(column[i])) gmap[g[i]][gcount[g[i]]++] = column[i]; for(int i = 1; i != ngp; ++i) { if(gcount[i] != 0) { int n = gcount[i], nth = lower ? (n-1)*Q : n*Q; auto begin = gmap[i].begin(), mid = begin + nth, end = begin + n; std::nth_element(begin, mid, end); nthj[i-1] = (tiesmean && n%2 == 0) ? (*(mid) + *(std::min_element(mid+1, end)))*0.5 : *(mid); } } SHALLOW_DUPLICATE_ATTRIB(nthj, column); out[j] = nthj; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; NumericVector nthj(ng); // no init numerically unstable if(lx1 != column.size()) stop("length(g) must match nrow(x)"); gcount.assign(ngp, 0); int ngs = 0; for(int i = 0; i != lx1; ++i) { if(isnan2(column[i])) { if(nisnan(nthj[g[i]-1])) { nthj[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } } else { gmap[g[i]][gcount[g[i]]++] = column[i]; } } for(int i = 0; i != ng; ++i) { if(isnan2(nthj[i])) continue; int n = gcount[i+1], nth = lower ? (n-1)*Q : n*Q; auto begin = gmap[i+1].begin(), mid = begin + nth, end = begin + n; std::nth_element(begin, mid, end); nthj[i] = (tiesmean && n%2 == 0) ? (*(mid) + *(std::min_element(mid+1, end)))*0.5 : *(mid); } SHALLOW_DUPLICATE_ATTRIB(nthj, column); out[j] = nthj; } } SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return out; } } else { // with weights NumericVector wg = w; if(lx1 != wg.size()) stop("length(w) must match nrow(x)"); IntegerVector o = no_init_vector(lx1); int *ord = INTEGER(o); if(ng == 0) { NumericVector out = no_init_vector(l); double wsumQ = 0; if(!narm) { wsumQ = std::accumulate(wg.begin(), wg.end(), 0.0) * Q; if(isnan2(wsumQ)) { stop("Missing weights in order statistics are currently only supported if x is also missing"); // std::fill(out.begin(), out.end(), NA_REAL); // goto outnth; } } for(int j = l; j--; ) { NumericVector column = x[j]; if(lx1 != column.size()) stop("length(w) must match nrow(x)"); Cdoubleradixsort(ord, TRUE, FALSE, column); // starts from 1 if(narm) { wsumQ = 0; for(int i = 0; i != lx1; ++i) if(nisnan(column[i])) wsumQ += wg[i]; // && nisnan(wg[i]) if(wsumQ == 0) { out[j] = NA_REAL; continue; } if(isnan2(wsumQ)) stop("Missing weights in order statistics are currently only supported if x is also missing"); wsumQ *= Q; } else { if(isnan2(column[o[lx1-1]-1])) { out[j] = NA_REAL; continue; } } double wsum = wg[o[0]-1]; int k = 1; // what about all missing ? -> gives dsort error... if(lower) { while(wsum < wsumQ) wsum += wg[o[k++]-1]; if(tiesmean && wsum == wsumQ) { double outtmp = column[o[k-1]-1], n = 2; while(wg[o[k]-1] == 0) { outtmp += column[o[k++]-1]; ++n; } out[j] = (outtmp + column[o[k]-1]) / n; continue; } } else { while(wsum <= wsumQ) wsum += wg[o[k++]-1]; } out[j] = column[o[k-1]-1]; } // outnth: if(drop) { Rf_setAttrib(out, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return out; } else { List outl(l); for(int j = l; j--; ) { outl[j] = out[j]; SHALLOW_DUPLICATE_ATTRIB(outl[j], x[j]); } SHALLOW_DUPLICATE_ATTRIB(outl, x); Rf_setAttrib(outl, R_RowNamesSymbol, Rf_ScalarInteger(1)); return outl; } } else { // with groups and weights if(lx1 != g.size()) stop("length(w) must match length(g)"); int gi, oi; List out(l); NumericVector wsumQ(ng); if(!narm) { for(int i = 0; i != lx1; ++i) wsumQ[g[i]-1] += wg[i]; wsumQ = wsumQ * Q; } for(int j = l; j--; ) { NumericVector column = x[j]; if(lx1 != column.size()) stop("length(w) must match nrow(x)"); Cdoubleradixsort(ord, TRUE, FALSE, column); NumericVector wsum(ng), nthj(ng, NA_REAL); if(narm) { std::fill(wsumQ.begin(), wsumQ.end(), 0.0); for(int i = 0; i != lx1; ++i) if(nisnan(column[i])) wsumQ[g[i]-1] += wg[i]; // && nisnan(wg[i]) for(int i = ng; i--; ) { if(isnan2(wsumQ[i])) stop("Missing weights in order statistics are currently only supported if x is also missing"); wsumQ[i] *= Q; } } else { for(int i = 0; i != lx1; ++i) if(isnan2(column[i])) wsum[g[i]-1] = DBL_MAX; // OK ?? // needed?? for(int i = ng; i--; ) if(isnan2(wsumQ[i]) && wsum[i] != DBL_MAX) stop("Missing weights in order statistics are currently only supported if x is also missing"); } if(tiesmean) { std::vector seen(ng); std::vector n(ng, 1); // only needed for 0 weights. Could check above if there are any. for(int i = 0; i != lx1; ++i) { oi = o[i]-1; gi = g[oi]-1; if(seen[gi]) continue; if(wsum[gi] < wsumQ[gi]) nthj[gi] = column[oi]; else { if(wsum[gi] > wsumQ[gi]) { seen[gi] = true; continue; } nthj[gi] += (column[oi]-nthj[gi])/++n[gi]; } wsum[gi] += wg[oi]; } } else if(lower) { for(int i = 0; i != lx1; ++i) { oi = o[i]-1; gi = g[oi]-1; if(wsum[gi] < wsumQ[gi]) { wsum[gi] += wg[oi]; nthj[gi] = column[oi]; } } } else { for(int i = 0; i != lx1; ++i) { oi = o[i]-1; gi = g[oi]-1; if(wsum[gi] <= wsumQ[gi]) { wsum[gi] += wg[oi]; nthj[gi] = column[oi]; } } } SHALLOW_DUPLICATE_ATTRIB(nthj, column); out[j] = nthj; } SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return out; } } } collapse/src/RcppExports.cpp0000644000176200001440000012622414174223711015651 0ustar liggesusers// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include using namespace Rcpp; #ifdef RCPP_USE_GLOBAL_ROSTREAM Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // TRACpp SEXP TRACpp(const SEXP& x, const SEXP& xAG, const IntegerVector& g, int ret); RcppExport SEXP _collapse_TRACpp(SEXP xSEXP, SEXP xAGSEXP, SEXP gSEXP, SEXP retSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP& >::type x(xSEXP); Rcpp::traits::input_parameter< const SEXP& >::type xAG(xAGSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); rcpp_result_gen = Rcpp::wrap(TRACpp(x, xAG, g, ret)); return rcpp_result_gen; END_RCPP } // TRAlCpp List TRAlCpp(const List& x, const SEXP& xAG, const IntegerVector& g, int ret); RcppExport SEXP _collapse_TRAlCpp(SEXP xSEXP, SEXP xAGSEXP, SEXP gSEXP, SEXP retSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< const SEXP& >::type xAG(xAGSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); rcpp_result_gen = Rcpp::wrap(TRAlCpp(x, xAG, g, ret)); return rcpp_result_gen; END_RCPP } // TRAmCpp SEXP TRAmCpp(const SEXP& x, const SEXP& xAG, const IntegerVector& g, int ret); RcppExport SEXP _collapse_TRAmCpp(SEXP xSEXP, SEXP xAGSEXP, SEXP gSEXP, SEXP retSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP& >::type x(xSEXP); Rcpp::traits::input_parameter< const SEXP& >::type xAG(xAGSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); rcpp_result_gen = Rcpp::wrap(TRAmCpp(x, xAG, g, ret)); return rcpp_result_gen; END_RCPP } // fndistinctCpp SEXP fndistinctCpp(const SEXP& x, int ng, const IntegerVector& g, const SEXP& gs, bool narm); RcppExport SEXP _collapse_fndistinctCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP narmSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); rcpp_result_gen = Rcpp::wrap(fndistinctCpp(x, ng, g, gs, narm)); return rcpp_result_gen; END_RCPP } // fndistinctlCpp SEXP fndistinctlCpp(const List& x, int ng, const IntegerVector& g, const SEXP& gs, bool narm, bool drop); RcppExport SEXP _collapse_fndistinctlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP narmSEXP, SEXP dropSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); rcpp_result_gen = Rcpp::wrap(fndistinctlCpp(x, ng, g, gs, narm, drop)); return rcpp_result_gen; END_RCPP } // fndistinctmCpp SEXP fndistinctmCpp(SEXP x, int ng, IntegerVector g, SEXP gs, bool narm, bool drop); RcppExport SEXP _collapse_fndistinctmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP narmSEXP, SEXP dropSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< IntegerVector >::type g(gSEXP); Rcpp::traits::input_parameter< SEXP >::type gs(gsSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); rcpp_result_gen = Rcpp::wrap(fndistinctmCpp(x, ng, g, gs, narm, drop)); return rcpp_result_gen; END_RCPP } // BWCpp NumericVector BWCpp(const NumericVector& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, double theta, double set_mean, bool B, bool fill); RcppExport SEXP _collapse_BWCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP thetaSEXP, SEXP set_meanSEXP, SEXP BSEXP, SEXP fillSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< double >::type theta(thetaSEXP); Rcpp::traits::input_parameter< double >::type set_mean(set_meanSEXP); Rcpp::traits::input_parameter< bool >::type B(BSEXP); Rcpp::traits::input_parameter< bool >::type fill(fillSEXP); rcpp_result_gen = Rcpp::wrap(BWCpp(x, ng, g, gs, w, narm, theta, set_mean, B, fill)); return rcpp_result_gen; END_RCPP } // BWmCpp NumericMatrix BWmCpp(const NumericMatrix& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, double theta, double set_mean, bool B, bool fill); RcppExport SEXP _collapse_BWmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP thetaSEXP, SEXP set_meanSEXP, SEXP BSEXP, SEXP fillSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericMatrix& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< double >::type theta(thetaSEXP); Rcpp::traits::input_parameter< double >::type set_mean(set_meanSEXP); Rcpp::traits::input_parameter< bool >::type B(BSEXP); Rcpp::traits::input_parameter< bool >::type fill(fillSEXP); rcpp_result_gen = Rcpp::wrap(BWmCpp(x, ng, g, gs, w, narm, theta, set_mean, B, fill)); return rcpp_result_gen; END_RCPP } // BWlCpp List BWlCpp(const List& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, double theta, double set_mean, bool B, bool fill); RcppExport SEXP _collapse_BWlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP thetaSEXP, SEXP set_meanSEXP, SEXP BSEXP, SEXP fillSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< double >::type theta(thetaSEXP); Rcpp::traits::input_parameter< double >::type set_mean(set_meanSEXP); Rcpp::traits::input_parameter< bool >::type B(BSEXP); Rcpp::traits::input_parameter< bool >::type fill(fillSEXP); rcpp_result_gen = Rcpp::wrap(BWlCpp(x, ng, g, gs, w, narm, theta, set_mean, B, fill)); return rcpp_result_gen; END_RCPP } // fbstatsCpp SEXP fbstatsCpp(const NumericVector& x, bool ext, int ng, const IntegerVector& g, int npg, const IntegerVector& pg, const SEXP& w, bool stable_algo, bool array, bool setn, const SEXP& gn); RcppExport SEXP _collapse_fbstatsCpp(SEXP xSEXP, SEXP extSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP npgSEXP, SEXP pgSEXP, SEXP wSEXP, SEXP stable_algoSEXP, SEXP arraySEXP, SEXP setnSEXP, SEXP gnSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP); Rcpp::traits::input_parameter< bool >::type ext(extSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< int >::type npg(npgSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type pg(pgSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type stable_algo(stable_algoSEXP); Rcpp::traits::input_parameter< bool >::type array(arraySEXP); Rcpp::traits::input_parameter< bool >::type setn(setnSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gn(gnSEXP); rcpp_result_gen = Rcpp::wrap(fbstatsCpp(x, ext, ng, g, npg, pg, w, stable_algo, array, setn, gn)); return rcpp_result_gen; END_RCPP } // fbstatsmCpp SEXP fbstatsmCpp(const NumericMatrix& x, bool ext, int ng, const IntegerVector& g, int npg, const IntegerVector& pg, const SEXP& w, bool stable_algo, bool array, const SEXP& gn); RcppExport SEXP _collapse_fbstatsmCpp(SEXP xSEXP, SEXP extSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP npgSEXP, SEXP pgSEXP, SEXP wSEXP, SEXP stable_algoSEXP, SEXP arraySEXP, SEXP gnSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericMatrix& >::type x(xSEXP); Rcpp::traits::input_parameter< bool >::type ext(extSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< int >::type npg(npgSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type pg(pgSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type stable_algo(stable_algoSEXP); Rcpp::traits::input_parameter< bool >::type array(arraySEXP); Rcpp::traits::input_parameter< const SEXP& >::type gn(gnSEXP); rcpp_result_gen = Rcpp::wrap(fbstatsmCpp(x, ext, ng, g, npg, pg, w, stable_algo, array, gn)); return rcpp_result_gen; END_RCPP } // fbstatslCpp SEXP fbstatslCpp(const List& x, bool ext, int ng, const IntegerVector& g, int npg, const IntegerVector& pg, const SEXP& w, bool stable_algo, bool array, const SEXP& gn); RcppExport SEXP _collapse_fbstatslCpp(SEXP xSEXP, SEXP extSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP npgSEXP, SEXP pgSEXP, SEXP wSEXP, SEXP stable_algoSEXP, SEXP arraySEXP, SEXP gnSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< bool >::type ext(extSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< int >::type npg(npgSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type pg(pgSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type stable_algo(stable_algoSEXP); Rcpp::traits::input_parameter< bool >::type array(arraySEXP); Rcpp::traits::input_parameter< const SEXP& >::type gn(gnSEXP); rcpp_result_gen = Rcpp::wrap(fbstatslCpp(x, ext, ng, g, npg, pg, w, stable_algo, array, gn)); return rcpp_result_gen; END_RCPP } // fdiffgrowthCpp NumericVector fdiffgrowthCpp(const NumericVector& x, const IntegerVector& n, const IntegerVector& diff, double fill, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& t, int ret, double rho, bool names, double power); RcppExport SEXP _collapse_fdiffgrowthCpp(SEXP xSEXP, SEXP nSEXP, SEXP diffSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP tSEXP, SEXP retSEXP, SEXP rhoSEXP, SEXP namesSEXP, SEXP powerSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type n(nSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type diff(diffSEXP); Rcpp::traits::input_parameter< double >::type fill(fillSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type t(tSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); Rcpp::traits::input_parameter< double >::type rho(rhoSEXP); Rcpp::traits::input_parameter< bool >::type names(namesSEXP); Rcpp::traits::input_parameter< double >::type power(powerSEXP); rcpp_result_gen = Rcpp::wrap(fdiffgrowthCpp(x, n, diff, fill, ng, g, gs, t, ret, rho, names, power)); return rcpp_result_gen; END_RCPP } // fdiffgrowthmCpp NumericMatrix fdiffgrowthmCpp(const NumericMatrix& x, const IntegerVector& n, const IntegerVector& diff, double fill, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& t, int ret, double rho, bool names, double power); RcppExport SEXP _collapse_fdiffgrowthmCpp(SEXP xSEXP, SEXP nSEXP, SEXP diffSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP tSEXP, SEXP retSEXP, SEXP rhoSEXP, SEXP namesSEXP, SEXP powerSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericMatrix& >::type x(xSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type n(nSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type diff(diffSEXP); Rcpp::traits::input_parameter< double >::type fill(fillSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type t(tSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); Rcpp::traits::input_parameter< double >::type rho(rhoSEXP); Rcpp::traits::input_parameter< bool >::type names(namesSEXP); Rcpp::traits::input_parameter< double >::type power(powerSEXP); rcpp_result_gen = Rcpp::wrap(fdiffgrowthmCpp(x, n, diff, fill, ng, g, gs, t, ret, rho, names, power)); return rcpp_result_gen; END_RCPP } // fdiffgrowthlCpp List fdiffgrowthlCpp(const List& x, const IntegerVector& n, const IntegerVector& diff, double fill, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& t, int ret, double rho, bool names, double power); RcppExport SEXP _collapse_fdiffgrowthlCpp(SEXP xSEXP, SEXP nSEXP, SEXP diffSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP tSEXP, SEXP retSEXP, SEXP rhoSEXP, SEXP namesSEXP, SEXP powerSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type n(nSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type diff(diffSEXP); Rcpp::traits::input_parameter< double >::type fill(fillSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type t(tSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); Rcpp::traits::input_parameter< double >::type rho(rhoSEXP); Rcpp::traits::input_parameter< bool >::type names(namesSEXP); Rcpp::traits::input_parameter< double >::type power(powerSEXP); rcpp_result_gen = Rcpp::wrap(fdiffgrowthlCpp(x, n, diff, fill, ng, g, gs, t, ret, rho, names, power)); return rcpp_result_gen; END_RCPP } // flagleadCpp SEXP flagleadCpp(SEXP x, IntegerVector n, SEXP fill, int ng, IntegerVector g, SEXP t, bool names); RcppExport SEXP _collapse_flagleadCpp(SEXP xSEXP, SEXP nSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP tSEXP, SEXP namesSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); Rcpp::traits::input_parameter< IntegerVector >::type n(nSEXP); Rcpp::traits::input_parameter< SEXP >::type fill(fillSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< IntegerVector >::type g(gSEXP); Rcpp::traits::input_parameter< SEXP >::type t(tSEXP); Rcpp::traits::input_parameter< bool >::type names(namesSEXP); rcpp_result_gen = Rcpp::wrap(flagleadCpp(x, n, fill, ng, g, t, names)); return rcpp_result_gen; END_RCPP } // flagleadmCpp SEXP flagleadmCpp(SEXP x, IntegerVector n, SEXP fill, int ng, IntegerVector g, SEXP t, bool names); RcppExport SEXP _collapse_flagleadmCpp(SEXP xSEXP, SEXP nSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP tSEXP, SEXP namesSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); Rcpp::traits::input_parameter< IntegerVector >::type n(nSEXP); Rcpp::traits::input_parameter< SEXP >::type fill(fillSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< IntegerVector >::type g(gSEXP); Rcpp::traits::input_parameter< SEXP >::type t(tSEXP); Rcpp::traits::input_parameter< bool >::type names(namesSEXP); rcpp_result_gen = Rcpp::wrap(flagleadmCpp(x, n, fill, ng, g, t, names)); return rcpp_result_gen; END_RCPP } // flagleadlCpp List flagleadlCpp(const List& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names); RcppExport SEXP _collapse_flagleadlCpp(SEXP xSEXP, SEXP nSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP tSEXP, SEXP namesSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type n(nSEXP); Rcpp::traits::input_parameter< const SEXP& >::type fill(fillSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type t(tSEXP); Rcpp::traits::input_parameter< bool >::type names(namesSEXP); rcpp_result_gen = Rcpp::wrap(flagleadlCpp(x, n, fill, ng, g, t, names)); return rcpp_result_gen; END_RCPP } // fmeanCpp NumericVector fmeanCpp(const NumericVector& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm); RcppExport SEXP _collapse_fmeanCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); rcpp_result_gen = Rcpp::wrap(fmeanCpp(x, ng, g, gs, w, narm)); return rcpp_result_gen; END_RCPP } // fmeanmCpp SEXP fmeanmCpp(const NumericMatrix& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, bool drop); RcppExport SEXP _collapse_fmeanmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericMatrix& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); rcpp_result_gen = Rcpp::wrap(fmeanmCpp(x, ng, g, gs, w, narm, drop)); return rcpp_result_gen; END_RCPP } // fmeanlCpp SEXP fmeanlCpp(const List& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, bool drop); RcppExport SEXP _collapse_fmeanlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); rcpp_result_gen = Rcpp::wrap(fmeanlCpp(x, ng, g, gs, w, narm, drop)); return rcpp_result_gen; END_RCPP } // fmodeCpp SEXP fmodeCpp(const SEXP& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, int ret); RcppExport SEXP _collapse_fmodeCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP retSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); rcpp_result_gen = Rcpp::wrap(fmodeCpp(x, ng, g, gs, w, narm, ret)); return rcpp_result_gen; END_RCPP } // fmodelCpp SEXP fmodelCpp(const List& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, int ret); RcppExport SEXP _collapse_fmodelCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP retSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); rcpp_result_gen = Rcpp::wrap(fmodelCpp(x, ng, g, gs, w, narm, ret)); return rcpp_result_gen; END_RCPP } // fmodemCpp SEXP fmodemCpp(SEXP x, int ng, IntegerVector g, SEXP gs, SEXP w, bool narm, bool drop, int ret); RcppExport SEXP _collapse_fmodemCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP, SEXP retSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< IntegerVector >::type g(gSEXP); Rcpp::traits::input_parameter< SEXP >::type gs(gsSEXP); Rcpp::traits::input_parameter< SEXP >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); rcpp_result_gen = Rcpp::wrap(fmodemCpp(x, ng, g, gs, w, narm, drop, ret)); return rcpp_result_gen; END_RCPP } // fnthCpp NumericVector fnthCpp(const NumericVector& x, double Q, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, int ret); RcppExport SEXP _collapse_fnthCpp(SEXP xSEXP, SEXP QSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP retSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP); Rcpp::traits::input_parameter< double >::type Q(QSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); rcpp_result_gen = Rcpp::wrap(fnthCpp(x, Q, ng, g, gs, w, narm, ret)); return rcpp_result_gen; END_RCPP } // fnthmCpp SEXP fnthmCpp(const NumericMatrix& x, double Q, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, bool drop, int ret); RcppExport SEXP _collapse_fnthmCpp(SEXP xSEXP, SEXP QSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP, SEXP retSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericMatrix& >::type x(xSEXP); Rcpp::traits::input_parameter< double >::type Q(QSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); rcpp_result_gen = Rcpp::wrap(fnthmCpp(x, Q, ng, g, gs, w, narm, drop, ret)); return rcpp_result_gen; END_RCPP } // fnthlCpp SEXP fnthlCpp(const List& x, double Q, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, bool drop, int ret); RcppExport SEXP _collapse_fnthlCpp(SEXP xSEXP, SEXP QSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP, SEXP retSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< double >::type Q(QSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); rcpp_result_gen = Rcpp::wrap(fnthlCpp(x, Q, ng, g, gs, w, narm, drop, ret)); return rcpp_result_gen; END_RCPP } // fprodCpp NumericVector fprodCpp(const NumericVector& x, int ng, const IntegerVector& g, const SEXP& w, bool narm); RcppExport SEXP _collapse_fprodCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); rcpp_result_gen = Rcpp::wrap(fprodCpp(x, ng, g, w, narm)); return rcpp_result_gen; END_RCPP } // fprodmCpp SEXP fprodmCpp(const NumericMatrix& x, int ng, const IntegerVector& g, const SEXP& w, bool narm, bool drop); RcppExport SEXP _collapse_fprodmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericMatrix& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); rcpp_result_gen = Rcpp::wrap(fprodmCpp(x, ng, g, w, narm, drop)); return rcpp_result_gen; END_RCPP } // fprodlCpp SEXP fprodlCpp(const List& x, int ng, const IntegerVector& g, const SEXP& w, bool narm, bool drop); RcppExport SEXP _collapse_fprodlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP dropSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); rcpp_result_gen = Rcpp::wrap(fprodlCpp(x, ng, g, w, narm, drop)); return rcpp_result_gen; END_RCPP } // fscaleCpp NumericVector fscaleCpp(const NumericVector& x, int ng, const IntegerVector& g, const SEXP& w, bool narm, double set_mean, double set_sd); RcppExport SEXP _collapse_fscaleCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP set_meanSEXP, SEXP set_sdSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< double >::type set_mean(set_meanSEXP); Rcpp::traits::input_parameter< double >::type set_sd(set_sdSEXP); rcpp_result_gen = Rcpp::wrap(fscaleCpp(x, ng, g, w, narm, set_mean, set_sd)); return rcpp_result_gen; END_RCPP } // fscalemCpp NumericMatrix fscalemCpp(const NumericMatrix& x, int ng, const IntegerVector& g, const SEXP& w, bool narm, double set_mean, double set_sd); RcppExport SEXP _collapse_fscalemCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP set_meanSEXP, SEXP set_sdSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericMatrix& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< double >::type set_mean(set_meanSEXP); Rcpp::traits::input_parameter< double >::type set_sd(set_sdSEXP); rcpp_result_gen = Rcpp::wrap(fscalemCpp(x, ng, g, w, narm, set_mean, set_sd)); return rcpp_result_gen; END_RCPP } // fscalelCpp List fscalelCpp(const List& x, int ng, const IntegerVector& g, const SEXP& w, bool narm, double set_mean, double set_sd); RcppExport SEXP _collapse_fscalelCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP set_meanSEXP, SEXP set_sdSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< double >::type set_mean(set_meanSEXP); Rcpp::traits::input_parameter< double >::type set_sd(set_sdSEXP); rcpp_result_gen = Rcpp::wrap(fscalelCpp(x, ng, g, w, narm, set_mean, set_sd)); return rcpp_result_gen; END_RCPP } // fvarsdCpp NumericVector fvarsdCpp(const NumericVector& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, bool stable_algo, bool sd); RcppExport SEXP _collapse_fvarsdCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP stable_algoSEXP, SEXP sdSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< bool >::type stable_algo(stable_algoSEXP); Rcpp::traits::input_parameter< bool >::type sd(sdSEXP); rcpp_result_gen = Rcpp::wrap(fvarsdCpp(x, ng, g, gs, w, narm, stable_algo, sd)); return rcpp_result_gen; END_RCPP } // fvarsdmCpp SEXP fvarsdmCpp(const NumericMatrix& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, bool stable_algo, bool sd, bool drop); RcppExport SEXP _collapse_fvarsdmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP stable_algoSEXP, SEXP sdSEXP, SEXP dropSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericMatrix& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< bool >::type stable_algo(stable_algoSEXP); Rcpp::traits::input_parameter< bool >::type sd(sdSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); rcpp_result_gen = Rcpp::wrap(fvarsdmCpp(x, ng, g, gs, w, narm, stable_algo, sd, drop)); return rcpp_result_gen; END_RCPP } // fvarsdlCpp SEXP fvarsdlCpp(const List& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, bool stable_algo, bool sd, bool drop); RcppExport SEXP _collapse_fvarsdlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP stable_algoSEXP, SEXP sdSEXP, SEXP dropSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< bool >::type stable_algo(stable_algoSEXP); Rcpp::traits::input_parameter< bool >::type sd(sdSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); rcpp_result_gen = Rcpp::wrap(fvarsdlCpp(x, ng, g, gs, w, narm, stable_algo, sd, drop)); return rcpp_result_gen; END_RCPP } // mrtl SEXP mrtl(const SEXP& X, bool names, int ret); RcppExport SEXP _collapse_mrtl(SEXP XSEXP, SEXP namesSEXP, SEXP retSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP& >::type X(XSEXP); Rcpp::traits::input_parameter< bool >::type names(namesSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); rcpp_result_gen = Rcpp::wrap(mrtl(X, names, ret)); return rcpp_result_gen; END_RCPP } // mctl SEXP mctl(const SEXP& X, bool names, int ret); RcppExport SEXP _collapse_mctl(SEXP XSEXP, SEXP namesSEXP, SEXP retSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP& >::type X(XSEXP); Rcpp::traits::input_parameter< bool >::type names(namesSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); rcpp_result_gen = Rcpp::wrap(mctl(X, names, ret)); return rcpp_result_gen; END_RCPP } // psmatCpp SEXP psmatCpp(const SEXP& x, const IntegerVector& g, const SEXP& t, bool transpose); RcppExport SEXP _collapse_psmatCpp(SEXP xSEXP, SEXP gSEXP, SEXP tSEXP, SEXP transposeSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP& >::type x(xSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type t(tSEXP); Rcpp::traits::input_parameter< bool >::type transpose(transposeSEXP); rcpp_result_gen = Rcpp::wrap(psmatCpp(x, g, t, transpose)); return rcpp_result_gen; END_RCPP } // pwnobsmCpp IntegerMatrix pwnobsmCpp(SEXP x); RcppExport SEXP _collapse_pwnobsmCpp(SEXP xSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); rcpp_result_gen = Rcpp::wrap(pwnobsmCpp(x)); return rcpp_result_gen; END_RCPP } // qFCpp SEXP qFCpp(SEXP x, bool ordered, bool na_exclude, bool keep_attr, int ret); RcppExport SEXP _collapse_qFCpp(SEXP xSEXP, SEXP orderedSEXP, SEXP na_excludeSEXP, SEXP keep_attrSEXP, SEXP retSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); Rcpp::traits::input_parameter< bool >::type ordered(orderedSEXP); Rcpp::traits::input_parameter< bool >::type na_exclude(na_excludeSEXP); Rcpp::traits::input_parameter< bool >::type keep_attr(keep_attrSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); rcpp_result_gen = Rcpp::wrap(qFCpp(x, ordered, na_exclude, keep_attr, ret)); return rcpp_result_gen; END_RCPP } // funiqueCpp SEXP funiqueCpp(SEXP x, bool sort); RcppExport SEXP _collapse_funiqueCpp(SEXP xSEXP, SEXP sortSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); Rcpp::traits::input_parameter< bool >::type sort(sortSEXP); rcpp_result_gen = Rcpp::wrap(funiqueCpp(x, sort)); return rcpp_result_gen; END_RCPP } // fdroplevelsCpp IntegerVector fdroplevelsCpp(const IntegerVector& x, bool check_NA); RcppExport SEXP _collapse_fdroplevelsCpp(SEXP xSEXP, SEXP check_NASEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const IntegerVector& >::type x(xSEXP); Rcpp::traits::input_parameter< bool >::type check_NA(check_NASEXP); rcpp_result_gen = Rcpp::wrap(fdroplevelsCpp(x, check_NA)); return rcpp_result_gen; END_RCPP } // seqid IntegerVector seqid(const IntegerVector& x, const SEXP& o, int del, int start, bool na_skip, bool skip_seq, bool check_o); RcppExport SEXP _collapse_seqid(SEXP xSEXP, SEXP oSEXP, SEXP delSEXP, SEXP startSEXP, SEXP na_skipSEXP, SEXP skip_seqSEXP, SEXP check_oSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const IntegerVector& >::type x(xSEXP); Rcpp::traits::input_parameter< const SEXP& >::type o(oSEXP); Rcpp::traits::input_parameter< int >::type del(delSEXP); Rcpp::traits::input_parameter< int >::type start(startSEXP); Rcpp::traits::input_parameter< bool >::type na_skip(na_skipSEXP); Rcpp::traits::input_parameter< bool >::type skip_seq(skip_seqSEXP); Rcpp::traits::input_parameter< bool >::type check_o(check_oSEXP); rcpp_result_gen = Rcpp::wrap(seqid(x, o, del, start, na_skip, skip_seq, check_o)); return rcpp_result_gen; END_RCPP } // groupid IntegerVector groupid(const SEXP& x, const SEXP& o, int start, bool na_skip, bool check_o); RcppExport SEXP _collapse_groupid(SEXP xSEXP, SEXP oSEXP, SEXP startSEXP, SEXP na_skipSEXP, SEXP check_oSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP& >::type x(xSEXP); Rcpp::traits::input_parameter< const SEXP& >::type o(oSEXP); Rcpp::traits::input_parameter< int >::type start(startSEXP); Rcpp::traits::input_parameter< bool >::type na_skip(na_skipSEXP); Rcpp::traits::input_parameter< bool >::type check_o(check_oSEXP); rcpp_result_gen = Rcpp::wrap(groupid(x, o, start, na_skip, check_o)); return rcpp_result_gen; END_RCPP } // varyingCpp LogicalVector varyingCpp(const SEXP& x, int ng, const IntegerVector& g, bool any_group); RcppExport SEXP _collapse_varyingCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP any_groupSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< bool >::type any_group(any_groupSEXP); rcpp_result_gen = Rcpp::wrap(varyingCpp(x, ng, g, any_group)); return rcpp_result_gen; END_RCPP } // varyingmCpp SEXP varyingmCpp(const SEXP& x, int ng, const IntegerVector& g, bool any_group, bool drop); RcppExport SEXP _collapse_varyingmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP any_groupSEXP, SEXP dropSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< bool >::type any_group(any_groupSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); rcpp_result_gen = Rcpp::wrap(varyingmCpp(x, ng, g, any_group, drop)); return rcpp_result_gen; END_RCPP } // varyinglCpp SEXP varyinglCpp(const List& x, int ng, const IntegerVector& g, bool any_group, bool drop); RcppExport SEXP _collapse_varyinglCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP any_groupSEXP, SEXP dropSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< bool >::type any_group(any_groupSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); rcpp_result_gen = Rcpp::wrap(varyinglCpp(x, ng, g, any_group, drop)); return rcpp_result_gen; END_RCPP } collapse/src/data.table_utils.c0000644000176200001440000004221614176642305016243 0ustar liggesusers/* This code is adapted from the data.table package: http://r-datatable.com and licensed under a Mozilla Public License 2.0 (MPL-2.0) license. */ #include "data.table.h" #include bool allNA(SEXP x, bool errorForBadType) { // less space and time than all(is.na(x)) at R level because that creates full size is.na(x) first before all() // whereas this allNA can often return early on testing the first value without reading the rest const int n = length(x); if (n==0) // empty vectors (including raw(), NULL, and list()) same as R's all(is.na()) true result; tests 2116.* return true; switch (TYPEOF(x)) { case RAWSXP: // raw doesn't support NA so always false (other than length 0 case above) return false; case LGLSXP: case INTSXP: { const int *xd = INTEGER(x); for (int i=0; i != n; ++i) if (xd[i]!=NA_INTEGER) { return false; } return true; } case REALSXP: if (INHERITS(x,char_integer64)) { const int64_t *xd = (int64_t *)REAL(x); for (int i=0; i != n; ++i) if (xd[i]!=NA_INTEGER64) { return false; } } else { const double *xd = REAL(x); for (int i=0; i != n; ++i) if (!ISNAN(xd[i])) { return false; } } return true; case STRSXP: { const SEXP *xd = STRING_PTR(x); for (int i=0; i != n; ++i) if (xd[i]!=NA_STRING) { return false; } return true; }} if (!errorForBadType) return false; error("Unsupported type '%s' passed to allNA()", type2char(TYPEOF(x))); // e.g. VECSXP; tests 2116.16-18 // turned off allNA list support for now to avoid accidentally using it internally where we did not intend; allNA not yet exported // https://github.com/Rdatatable/data.table/pull/3909#discussion_r329065950 } SEXP allNAv(SEXP x, SEXP errorForBadType) { return ScalarLogical(allNA(x, asLogical(errorForBadType))); } inline bool INHERITS(SEXP x, SEXP char_) { // Thread safe inherits() by pre-calling install() in init.c and then // passing those char_* in here for simple and fast non-API pointer compare. // The thread-safety aspect here is only currently actually needed for list columns in // fwrite() where the class of the cell's vector is tested; the class of the column // itself is pre-stored by fwrite (for example in isInteger64[] and isITime[]). // Thread safe in the limited sense of correct and intended usage : // i) no API call such as install() or mkChar() must be passed in. // ii) no attrib writes must be possible in other threads. SEXP klass; if (isString(klass = getAttrib(x, R_ClassSymbol))) { for (int i=0; i make.levels -> rbindlist -> [ -> [.data.table // Perhaps related to row names and the copyMostAttrib() below is not quite sufficient size_t n = XLENGTH(x); SEXP ans = PROTECT(allocVector(TYPEOF(x), XLENGTH(x))); switch (TYPEOF(ans)) { case RAWSXP: memcpy(RAW(ans), RAW(x), n*sizeof(Rbyte)); // # nocov; add coverage when ALTREP is turned on for all types break; // # nocov case LGLSXP: memcpy(LOGICAL(ans), LOGICAL(x), n*sizeof(Rboolean)); // # nocov break; // # nocov case INTSXP: memcpy(INTEGER(ans), INTEGER(x), n*sizeof(int)); // covered by 10:1 after test 178 break; case REALSXP: memcpy(REAL(ans), REAL(x), n*sizeof(double)); // covered by as.Date("2013-01-01")+seq(1,1000,by=10) after test 1075 break; case CPLXSXP: memcpy(COMPLEX(ans), COMPLEX(x), n*sizeof(Rcomplex)); // # nocov break; // # nocov case STRSXP: { const SEXP *xp=STRING_PTR(x); // covered by as.character(as.hexmode(1:500)) after test 642 for (R_xlen_t i=0; i != n; ++i) SET_STRING_ELT(ans, i, xp[i]); } break; case VECSXP: { const SEXP *xp=SEXPPTR(x); // # nocov for (R_xlen_t i=0; i != n; ++i) SET_VECTOR_ELT(ans, i, xp[i]); // # nocov } break; // # nocov default: error("Internal error: unsupported type '%s' passed to copyAsPlain()", type2char(TYPEOF(x))); // # nocov } copyMostAttrib(x, ans); // e.g. factor levels, class etc, but not names, dim or dimnames if (ALTREP(ans)) error("Internal error: type '%s' passed to copyAsPlain() but it seems copyMostAttrib() retains ALTREP attributes", type2char(TYPEOF(x))); // # nocov UNPROTECT(1); return ans; } /* No longer needed in GRP.default ! -> group sizes are now directly calculated by radixsort! SEXP uniqlengths(SEXP x, SEXP n) { // seems very similar to rbindlist.c:uniq_lengths. TODO: centralize into common function if (TYPEOF(x) != INTSXP) error("Input argument 'x' to 'uniqlengths' must be an integer vector"); if (TYPEOF(n) != INTSXP || length(n) != 1) error("Input argument 'n' to 'uniqlengths' must be an integer vector of length 1"); R_len_t len = length(x); SEXP ans = PROTECT(allocVector(INTSXP, len)); for (R_len_t i=1; i0) INTEGER(ans)[len-1] = INTEGER(n)[0] - INTEGER(x)[len-1] + 1; UNPROTECT(1); return(ans); } */ // from data.table_frank.c -> simplified frank, only dense method !! SEXP dt_na(SEXP x, SEXP cols) { int n=0, elem; if (!isNewList(x)) error("Internal error. Argument 'x' to Cdt_na is type '%s' not 'list'", type2char(TYPEOF(x))); // # nocov if (!isInteger(cols)) error("Internal error. Argument 'cols' to Cdt_na is type '%s' not 'integer'", type2char(TYPEOF(cols))); // # nocov for (int i=0; iLENGTH(x)) error("Item %d of 'cols' is %d which is outside 1-based range [1,ncol(x)=%d]", i+1, elem, LENGTH(x)); if (!n) n = length(VECTOR_ELT(x, elem-1)); } SEXP ans = PROTECT(allocVector(LGLSXP, n)); int *ians = LOGICAL(ans); for (int i=0; i != n; ++i) ians[i]=0; for (int i=0; i 0) { switch(asInteger(dns)) { case 0: // Not Sorted k=1; for (i = 0; i != ng; i++) { for (j = xstart[i]-1, end = xstart[i]+xlen[i]-1; j < end; j++) ians[xorder[j]-1] = k; k++; } break; case 1: // Sorted k=1; for (i = 0; i != ng; i++) { for (j = xstart[i]-1, end = xstart[i]+xlen[i]-1; j < end; j++) ians[j] = k; k++; } break; case 2: // This is basically run-length type group-id for (i = 0; i != ng; i++) { k=1; for (j = xstart[i]-1, end = xstart[i]+xlen[i]-1; j < end; j++) ians[xorder[j]-1] = k++; } break; default: error("dns must be 0, 1 or 2"); } } UNPROTECT(1); return(ans); } // extern SEXP char_integer64; // internal version of anyNA for data.tables // SEXP anyNA(SEXP x, SEXP cols) { // int i, j, n=0, elem; // // if (!isNewList(x)) error("Internal error. Argument 'x' to CanyNA is type '%s' not 'list'", type2char(TYPEOF(x))); // #nocov // if (!isInteger(cols)) error("Internal error. Argument 'cols' to CanyNA is type '%s' not 'integer'", type2char(TYPEOF(cols))); // # nocov // for (i=0; iLENGTH(x)) // error("Item %d of 'cols' is %d which is outside 1-based range [1,ncol(x)=%d]", i+1, elem, LENGTH(x)); // if (!n) n = length(VECTOR_ELT(x, elem-1)); // } // SEXP ans = PROTECT(allocVector(LGLSXP, 1)); // LOGICAL(ans)[0]=0; // for (i=0; incol) error("Internal error: o passed to Csetcolorder contains an NA or out-of-bounds"); // # nocov if (seen[od[i]-1]) error("Internal error: o passed to Csetcolorder contains a duplicate"); // # nocov seen[od[i]-1] = true; } Free(seen); SEXP *tmp = Calloc(ncol, SEXP); SEXP *xd = SEXPPTR(x), *namesd = STRING_PTR(names); for (int i=0; i != ncol; ++i) tmp[i] = xd[od[i]-1]; memcpy(xd, tmp, ncol*sizeof(SEXP)); // sizeof is type size_t so no overflow here for (int i=0; i != ncol; ++i) tmp[i] = namesd[od[i]-1]; memcpy(namesd, tmp, ncol*sizeof(SEXP)); // No need to change key (if any); sorted attribute is column names not positions Free(tmp); return(R_NilValue); } SEXP keepattr(SEXP to, SEXP from) { // Same as R_copyDFattr in src/main/attrib.c, but that seems not exposed in R's api // Only difference is that we reverse from and to in the prototype, for easier calling above SET_ATTRIB(to, ATTRIB(from)); IS_S4_OBJECT(from) ? SET_S4_OBJECT(to) : UNSET_S4_OBJECT(to); SET_OBJECT(to, OBJECT(from)); return to; } SEXP growVector(SEXP x, R_len_t newlen) { // Similar to EnlargeVector in src/main/subassign.c, with the following changes : // * replaced switch and loops with one memcpy for INTEGER and REAL, but need to age CHAR and VEC. // * no need to cater for names // * much shorter and faster SEXP newx; R_len_t i, len = length(x); if (isNull(x)) error("growVector passed NULL"); PROTECT(newx = allocVector(TYPEOF(x), newlen)); // TO DO: R_realloc(?) here? if (newlen < len) len=newlen; // i.e. shrink switch (TYPEOF(x)) { case STRSXP : for (i=0; i %\VignetteIndexEntry{collapse Documentation and Resources} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- *collapse* is a C/C++ based package for data transformation and statistical computing in R. It's aims are: 1. To facilitate complex data transformation, exploration and computing tasks in R. 2. To help make R code fast, flexible, parsimonious and programmer friendly. Documentation comes in 4 different forms: ## Built-In Structured Documentation After installing *collapse*, you can call `help("collapse-documentation")` which will produce a central help page providing a broad overview of the entire functionality of the package, including direct links to all function documentation pages and links to 13 further topical documentation pages describing how clusters of related functions work together. The names of these additional help pages are contained in a global macro `.COLLAPSE_TOPICS` and can so easily be called from the R console as well. Function documentation is interlinked with the relevant topical pages, and all documentation pages link back to the central overview page at `help("collapse-documentation")`. Thus *collapse* comes with a fully structured hierarchical documentation which you can browse within R - and that provides everything necessary to fully understand the package. The Documentation is also available [online](). The package page under `help("collapse-package")` additionally provides some more general information about the package and its design philosophy, as well as a very compact set of examples covering important functionality (which lack features introduced in 1.7 though). Reading `help("collapse-package")` and `help("collapse-documentation")` and working through the examples on `help("collapse-package")` is the fastest way to get acquainted with the package. `help("collapse-documentation")` is also the most up-to-date documentation of the package at the time of releasing 1.7 (January 2022). ## Vignettes There are also 5 vignettes which are available [online]() (due to their size and the enhanced browsing experience on the website). The vignettes are: * [**Introduction to *collapse* **](): Introduces all main features in a structured way * [***collapse* and *dplyr* **](): Demonstrates the integration of collapse with *dplyr* / *tidyverse* workflows and associated performance improvements * [***collapse* and *plm***](): Demonstrates the integration of collapse with *plm* and shows examples of efficient programming with panel data * [***collapse* and *data.table***](): Shows how collapse and *data.table* may be used together in a harmonious way * [***collapse* and *sf***](): Shows how collapse can be used to efficiently manipulate *sf* data frames Note that these vignettes currently (January 2022) do not cover features introduced in version 1.7. They have been updated if you see a 2022 in the date of the vignette. ## Blog I maintain a [blog]() linked to [Rbloggers.com]() where I introduced *collapse* with some compact posts covering central functionality. Among these, the post about [programming with *collapse*]() is highly recommended for ambitious users and developers willing to build on *collapse*, as it exposes to some degree how central parts of *collapse* work together and provides tips on how to write very efficient *collapse* code. Future blog posts will expose some specialized functionality in more detail. ## Cheatsheet Finally, there is a [cheatsheet]() at Rstudio that compactly summarizes the collapse function space, similar to `help("collapse-documentation")`. This one will be updated shortly. collapse/R/0000755000176200001440000000000014201327077012260 5ustar liggesuserscollapse/R/small_helper.R0000644000176200001440000010233614174223734015063 0ustar liggesusers# Functions needed for internal use because of option(collapse_mask = "fast-stat-fun") bsum <- base::sum bprod <- base::prod bmin <- base::min bmax <- base::max # Row-operations (documented under data transformations...) ... see if any other package has it (i.e. matrixStats etc..) # or wirhout r ??? look for %+% function on Rducumentation.. rdio. "%rr%" <- function(X, v) if(is.atomic(X) || is.atomic(v) || inherits(X, "data.frame")) TRA(X, v, "replace_fill") else # outer(rep.int(1L, dim(X)[2L]), v) duplAttributes(mapply(function(x, y) TRA(x, y, "replace_fill"), unattrib(X), unattrib(v), USE.NAMES = FALSE, SIMPLIFY = FALSE), X) "%r+%" <- function(X, v) if(is.atomic(X) || is.atomic(v) || inherits(X, "data.frame")) TRA(X, v, "+") else duplAttributes(mapply(function(x, y) TRA(x, y, "+"), unattrib(X), unattrib(v), USE.NAMES = FALSE, SIMPLIFY = FALSE), X) "%r-%" <- function(X, v) if(is.atomic(X) || is.atomic(v) || inherits(X, "data.frame")) TRA(X, v, "-") else duplAttributes(mapply(function(x, y) TRA(x, y, "-"), unattrib(X), unattrib(v), USE.NAMES = FALSE, SIMPLIFY = FALSE), X) "%r*%" <- function(X, v) if(is.atomic(X) || is.atomic(v) || inherits(X, "data.frame")) TRA(X, v, "*") else duplAttributes(mapply(function(x, y) TRA(x, y, "*"), unattrib(X), unattrib(v), USE.NAMES = FALSE, SIMPLIFY = FALSE), X) "%r/%" <- function(X, v) if(is.atomic(X) || is.atomic(v) || inherits(X, "data.frame")) TRA(X, v, "/") else duplAttributes(mapply(function(x, y) TRA(x, y, "/"), unattrib(X), unattrib(v), USE.NAMES = FALSE, SIMPLIFY = FALSE), X) # othidentity <- function(x, y) y "%cr%" <- function(X, V) if(is.atomic(X)) return(duplAttributes(rep(V, NCOL(X)), X)) else # outer(rep.int(1L, dim(X)[2L]), V) if(is.atomic(V)) return(duplAttributes(lapply(vector("list", length(unclass(X))), function(z) V), X)) else copyAttrib(V, X) # copyAttrib first makes a shallow copy of V "%c+%" <- function(X, V) if(is.atomic(X)) return(X + V) else duplAttributes(if(is.atomic(V)) lapply(unattrib(X), `+`, V) else mapply(`+`, unattrib(X), unattrib(V), USE.NAMES = FALSE, SIMPLIFY = FALSE), X) "%c-%" <- function(X, V) if(is.atomic(X)) return(X - V) else duplAttributes(if(is.atomic(V)) lapply(unattrib(X), `-`, V) else mapply(`-`, unattrib(X), unattrib(V), USE.NAMES = FALSE, SIMPLIFY = FALSE), X) "%c*%" <- function(X, V) if(is.atomic(X)) return(X * V) else duplAttributes(if(is.atomic(V)) lapply(unattrib(X), `*`, V) else mapply(`*`, unattrib(X), unattrib(V), USE.NAMES = FALSE, SIMPLIFY = FALSE), X) "%c/%" <- function(X, V) if(is.atomic(X)) return(X / V) else # or * 1L/V ?? duplAttributes(if(is.atomic(V)) lapply(unattrib(X), `/`, V) else mapply(`/`, unattrib(X), unattrib(V), USE.NAMES = FALSE, SIMPLIFY = FALSE), X) # Multiple-assignment "%=%" <- function(nam, values) invisible(.Call(C_multiassign, nam, values, parent.frame())) massign <- function(nam, values, envir = parent.frame()) invisible(.Call(C_multiassign, nam, values, envir)) # R implementation: # "%=%" <- function(lhs, rhs) { # if(!is.character(lhs)) stop("lhs needs to be character") # if(!is.list(rhs)) rhs <- as.vector(rhs, "list") # if(length(lhs) != length(rhs)) stop("length(lhs) not equal to length(rhs)") # list2env(`names<-`(rhs, lhs), envir = parent.frame(), # parent = NULL, hash = FALSE, size = 0L) # invisible() # } group <- function(x, starts = FALSE, group.sizes = FALSE) `oldClass<-`(.Call(C_group, x, starts, group.sizes), c("qG", "na.included")) gsplit <- function(x = NULL, g, use.g.names = FALSE, ...) { if(!inherits(g, "GRP")) g <- GRP(g, return.groups = use.g.names, call = FALSE, ...) res <- if(is.null(x)) .Call(C_gsplit, 1L, g, TRUE) else if(length(unclass(x)) == length(g[[2L]])) .Call(C_gsplit, x, g, FALSE) else if(is.object(x)) lapply(.Call(C_gsplit, 1L, g, TRUE), function(i) x[i]) else stop("length(x) must match length(g)") if(use.g.names) names(res) <- GRPnames(g, FALSE) res } greorder <- function(x, g) .Call(C_greorder, x, g) getenvFUN <- function(nam, efmt1 = "For this method need to install.packages('%s'), then unload [detach('package:collapse', unload = TRUE)] and reload [library(collapse)].") { if(is.null(FUN <- .collapse_env[[nam]])) { v <- strsplit(nam, "_", fixed = TRUE)[[1L]] .collapse_env[[nam]] <- FUN <- if(requireNamespace(v[1L], quietly = TRUE)) get0(v[2L], envir = getNamespace(v[1L])) else NULL if(is.null(FUN)) stop(sprintf(efmt1, v[1L])) } FUN } getpix <- function(x) switch(typeof(x), externalptr = .Call(C_geteptr, x), x) # getplmindex <- function(x) { # ix <- getpix(attr(x, "index")) # if(is.list(ix)) return(ix) # .Call(C_geteptr, ix) # } # qM2 <- function(x) if(is.list(x)) do.call(cbind, x) else x null2NA <- function(x) if(is.null(x)) NA_character_ else x # flapply <- function(x, FUN, ...) lapply(unattrib(x), FUN, ...) # not really needed ... vlabels <- function(X, attrn = "label", use.names = TRUE) .Call(C_vlabels, X, attrn, use.names) # { # if(is.atomic(X)) return(null2NA(attr(X, attrn))) # res <- lapply(X, attr, attrn) # unattrib(X): no names # res[vapply(res, is.null, TRUE)] <- NA_character_ # unlist(res) # } "vlabels<-" <- function(X, attrn = "label", value) { if(is.atomic(X)) return(`attr<-`(X, attrn, value)) .Call(C_setvlabels, X, attrn, value, NULL) } # "vlabels<-" <- function(X, attrn = "label", value) { # names(value) <- NULL # if(is.atomic(X)) return(`attr<-`(X, attrn, value)) # clx <- oldClass(X) # oldClass(X) <- NULL # if(is.null(value)) { # for (i in seq_along(X)) attr(X[[i]], attrn) <- NULL # } else { # if(length(X) != length(value)) stop("length(X) must match length(value)") # for (i in seq_along(value)) attr(X[[i]], attrn) <- value[[i]] # } # if(any(clx == "data.table")) return(alc(`oldClass<-`(X, clx))) # `oldClass<-`(X, clx) # } # Note: Shallow copy does not work as it only copies the list, but the attribute is a feature of the atomic elements inside... setLabels <- function(X, value, attrn = "label", cols = NULL) { # , sc = TRUE if(is.atomic(X)) return(`attr<-`(X, attrn, value)) .Call(C_setvlabels, X, attrn, value, as.integer(cols)) } # Also slower on WDI !! # "vlabels2<-" <- function(X, attrn = "label", value) { # names(value) <- NULL # if(is.atomic(X)) return(`attr<-`(X, attrn, value)) # duplAttributes(mapply(function(x, y) `attr<-`(x, attrn, y), `attributes<-`(X, NULL), as.vector(value, "list"), # SIMPLIFY = FALSE, USE.NAMES = FALSE), X) # } .c <- function(...) as.character(substitute(c(...))[-1L]) strclp <- function(x) if(length(x) > 1L) paste(x, collapse = " ") else x pasteclass <- function(x) if(length(cx <- class(x)) > 1L) paste(cx, collapse = " ") else cx vclasses <- function(X, use.names = TRUE) { if(is.atomic(X)) return(pasteclass(X)) vapply(X, pasteclass, character(1L), USE.NAMES = use.names) # unattrib(X): no names } # https://github.com/wch/r-source/blob/4a409a1a244d842a3098d2783c5b63c9661fc6be/src/main/util.c R_types <- c("NULL", # NILSXP "symbol", # SYMSXP "pairlist", # LISTSXP "closure", # CLOSXP "environment", # ENVSXP "promise", # PROMSXP "language", # LANGSXP "special", # SPECIALSXP "builtin", # BUILTINSXP "char", # CHARSXP "logical", # LGLSXP "", "", "integer", # INTSXP "double", # REALSXP "complex", # CPLXSXP "character", # STRSXP "...", # DOTSXP "any", # ANYSXP "list", # VECSXP "expression", # EXPRSXP "bytecode", # BCODESXP "externalptr", # EXTPTRSXP "weakref", # WEAKREFSXP "raw", # RAWSXP "S4") # S4SXP # /* aliases : */ # { "numeric", REALSXP }, # { "name", SYMSXP }, vtypes <- function(X, use.names = TRUE) { if(is.atomic(X)) return(typeof(X)) res <- R_types[.Call(C_vtypes, X, 0L)] if(use.names) names(res) <- attr(X, "names") res # vapply(X, typeof, character(1L)) # unattrib(X): no names } vlengths <- function(X, use.names = TRUE) .Call(C_vlengths, X, use.names) namlab <- function(X, class = FALSE, attrn = "label", N = FALSE, Ndistinct = FALSE) { if(!is.list(X)) stop("namlab only works with lists") res <- list(Variable = attr(X, "names")) attributes(X) <- NULL if(class) res$Class <- vapply(X, pasteclass, character(1), USE.NAMES = FALSE) if(N) res$N <- fnobs.data.frame(X) if(Ndistinct) res$Ndist <- fndistinct.data.frame(X) res$Label <- vlabels(X, attrn, FALSE) attr(res, "row.names") <- c(NA_integer_, -length(X)) oldClass(res) <- "data.frame" res } add_stub <- function(X, stub, pre = TRUE, cols = NULL) { if(!is.character(stub)) return(X) if(is.atomic(X) && is.array(X)) { if(length(dim(X)) > 2L) stop("Can't stub higher dimensional arrays!") dn <- dimnames(X) cn <- dn[[2L]] if(length(cn)) { if(length(cols)) cn[cols] <- if(pre) paste0(stub, cn[cols]) else paste0(cn[cols], stub) else cn <- if(pre) paste0(stub, cn) else paste0(cn, stub) dimnames(X) <- list(dn[[1L]], cn) } } else { nam <- attr(X, "names") if(length(nam)) { if(length(cols)) attr(X, "names")[cols] <- if(pre) paste0(stub, nam[cols]) else paste0(nam[cols], stub) else attr(X, "names") <- if(pre) paste0(stub, nam) else paste0(nam, stub) if(inherits(X, "data.table")) X <- alc(X) } } X } rm_stub <- function(X, stub, pre = TRUE, regex = FALSE, cols = NULL, ...) { if(!is.character(stub)) return(X) if(regex) rmstubFUN <- function(x) { gsub(stub, "", x, ...) } else if(pre) rmstubFUN <- function(x) { # much faster than using sub! v <- startsWith(x, stub) x[v] <- substr(x[v], nchar(stub)+1L, 1000000L) x } else rmstubFUN <- function(x) { # much faster than using sub! v <- endsWith(x, stub) xv <- x[v] # faster .. x[v] <- substr(xv, 0L, nchar(xv)-nchar(stub)) x } if(is.atomic(X)) { d <- dim(X) if(is.null(d)) if(is.character(X)) return(if(length(cols)) replace(X, cols, rmstubFUN(X[cols])) else rmstubFUN(X)) else stop("Cannot modify a vector that is not character") if(length(d) > 2L) stop("Can't remove stub from higher dimensional arrays!") dn <- dimnames(X) cn <- dn[[2L]] dimnames(X) <- list(dn[[1L]], if(length(cols)) replace(cn, cols, rmstubFUN(cn[cols])) else rmstubFUN(cn)) } else { nam <- attr(X, "names") attr(X, "names") <- if(length(cols)) replace(nam, cols, rmstubFUN(nam[cols])) else rmstubFUN(nam) if(inherits(X, "data.table")) X <- alc(X) } X } setRownames <- function(object, nm = if(is.atomic(object)) seq_row(object) else NULL) { if(is.list(object)) { l <- length(.subset2(object, 1L)) if(is.null(nm)) nm <- .set_row_names(l) else if(length(nm) != l) stop("supplied row-names must match list extent") attr(object, "row.names") <- nm if(inherits(object, "data.table")) return(alc(object)) return(object) } if(!is.array(object)) stop("Setting row-names only supported on arrays and lists") dn <- dimnames(object) `dimnames<-`(object, c(list(nm), dn[-1L])) } setColnames <- function(object, nm) { if(is.atomic(object) && is.array(object)) dimnames(object)[[2L]] <- nm else { attr(object, "names") <- nm if(inherits(object, "data.table")) return(alc(object)) } object } setDimnames <- function(object, dn, which = NULL) { if(is.null(which)) return(`dimnames<-`(object, dn)) if(is.atomic(dn)) dimnames(object)[[which]] <- dn else dimnames(object)[which] <- dn object } all_identical <- function(...) { if(...length() == 1L && is.list(...)) return(all(vapply(unattrib(...)[-1L], identical, TRUE, .subset2(..., 1L)))) l <- list(...) all(vapply(l[-1L], identical, TRUE, l[[1L]])) } all_obj_equal <- function(...) { if(...length() == 1L && is.list(...)) r <- unlist(lapply(unattrib(...)[-1L], all.equal, .subset2(..., 1L)), use.names = FALSE) else { l <- list(...) r <- unlist(lapply(l[-1L], all.equal, l[[1L]]), use.names = FALSE) } is.logical(r) } cinv <- function(x) chol2inv(chol(x)) # TODO: Use outer here for simple case... interact_names <- function(l) do.call(paste, c(expand.grid(l, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE), list(sep = "."))) # set over-allocation for data.table's alc <- function(x) .Call(C_alloccol, x) condalc <- function(x, DT) if(DT) .Call(C_alloccol, x) else x alcSA <- function(x, a) .Call(C_alloccol, .Call(C_setAttributes, x, a)) condalcSA <- function(x, a, DT) if(DT) .Call(C_alloccol, .Call(C_setAttributes, x, a)) else .Call(C_setAttributes, x, a) unattrib <- function(object) `attributes<-`(object, NULL) # Both equally efficient and therefore redundant ! # setAttr <- function(object, a, v) .Call(C_setAttr, object, a, v) # setAttrR <- function(object, a, v) `attr<-`(object, a, v) setAttrib <- function(object, a) .Call(C_setAttrib, object, a) # setAttribR <- function(object, a) `attributes<-`(object, x) copyAttrib <- function(to, from) .Call(C_copyAttrib, to, from) # copyAttribR <- function(to, from) `attributes<-`(to, attributes(from)) copyMostAttrib <- function(to, from) .Call(C_copyMostAttrib, to, from) # copyMostAttribR <- function(to, from) `mostattributes<-`(to, attributes(from)) addAttributes <- function(x, a) .Call(C_setAttributes, x, c(attributes(x), a)) is_categorical <- function(x) !is.numeric(x) is.categorical <- function(x) { message("Note that 'is.categorical' was renamed to 'is_categorical'. It will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") !is.numeric(x) } is_date <- function(x) inherits(x, c("Date","POSIXlt","POSIXct")) is.Date <- function(x) { message("Note that 'is.Date' was renamed to 'is_date'. It will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") inherits(x, c("Date","POSIXlt","POSIXct")) } "%!in%" <- function(x, table) match(x, table, nomatch = 0L) == 0L # more consistent with base than na_rm # na.rm <- function(x) { # cpp version available, but not faster ! # if(length(attr(x, "names"))) { # gives corruped time-series ! # ax <- attributes(x) # r <- x[!is.na(x)] # ax[["names"]] <- names(r) # setAttributes(r, ax) # } else duplAttributes(x[!is.na(x)], x) # } whichv <- function(x, value, invert = FALSE) .Call(C_whichv, x, value, invert) "%==%" <- function(x, value) .Call(C_whichv, x, value, FALSE) "%!=%" <- function(x, value) .Call(C_whichv, x, value, TRUE) whichNA <- function(x, invert = FALSE) .Call(C_whichv, x, NA, invert) alloc <- function(value, n) .Call(C_alloc, value, n) allNA <- function(x) .Call(C_allNA, x, TRUE) # True means give error for unsupported vector types, not FALSE. anyv <- function(x, value) .Call(C_anyallv, x, value, FALSE) allv <- function(x, value) .Call(C_anyallv, x, value, TRUE) copyv <- function(X, v, R, ..., invert = FALSE, vind1 = FALSE) { if(is.list(X, ...)) { # Making sure some error is produced if dots are used if(is.list(R)) { res <- mapply(function(x, r) .Call(C_setcopyv, x, v, r, invert, FALSE, vind1), unattrib(X), unattrib(R), USE.NAMES = FALSE, SIMPLIFY = FALSE) } else { res <- lapply(unattrib(X), function(x) .Call(C_setcopyv, x, v, R, invert, FALSE, vind1)) } return(condalc(duplAttributes(res, X), inherits(X, "data.table"))) } .Call(C_setcopyv, X, v, R, invert, FALSE, vind1) } setv <- function(X, v, R, ..., invert = FALSE, vind1 = FALSE) { if(is.list(X, ...)) { # Making sure some error is produced if dots are used if(is.list(R)) { mapply(function(x, r) .Call(C_setcopyv, x, v, r, invert, TRUE, vind1), unattrib(X), unattrib(R), USE.NAMES = FALSE, SIMPLIFY = FALSE) } else { lapply(unattrib(X), function(x) .Call(C_setcopyv, x, v, R, invert, TRUE, vind1)) } return(invisible(X)) } invisible(.Call(C_setcopyv, X, v, R, invert, TRUE, vind1)) } setop <- function(X, op, V, ..., rowwise = FALSE) # Making sure some error is produced if dots are used invisible(.Call(C_setop, X, V, switch(op, "+" = 1L, "-" = 2L, "*" = 3L, "/" = 4L, stop("Unsupported operation:", op)), rowwise), ...) "%+=%" <- function(X, V) invisible(.Call(C_setop, X, V, 1L, FALSE)) "%-=%" <- function(X, V) invisible(.Call(C_setop, X, V, 2L, FALSE)) "%*=%" <- function(X, V) invisible(.Call(C_setop, X, V, 3L, FALSE)) "%/=%" <- function(X, V) invisible(.Call(C_setop, X, V, 4L, FALSE)) missing_cases <- function(X, cols = NULL) { if(is.list(X)) return(.Call(C_dt_na, X, if(is.null(cols)) seq_along(unclass(X)) else cols2int(cols, X, attr(X, "names")))) if(is.matrix(X)) return(if(is.null(cols)) !complete.cases(X) else !complete.cases(X[, cols])) is.na(X) } na_rm <- function(x) .Call(C_na_rm, x) # x[!is.na(x)] # Also takes names along, whereas na_rm does not preserve names of list null_rm <- function(l) if(!all(ind <- vlengths(l, FALSE) > 0L)) .subset(l, ind) else l all_eq <- function(x) .Call(C_anyallv, x, x[1L], TRUE) na_omit <- function(X, cols = NULL, na.attr = FALSE) { if(is.list(X)) { iX <- seq_along(unclass(X)) rl <- if(is.null(cols)) !.Call(C_dt_na, X, iX) else !.Call(C_dt_na, X, cols2int(cols, X, attr(X, "names"))) # gives error if X not list rkeep <- which(rl) if(length(rkeep) == fnrow2(X)) return(condalc(X, inherits(X, "data.table"))) res <- .Call(C_subsetDT, X, rkeep, iX, FALSE) rn <- attr(X, "row.names") if(!(is.numeric(rn) || is.null(rn) || rn[1L] == "1")) attr(res, "row.names") <- rn[rkeep] if(na.attr) { attr(res, "na.action") <- `oldClass<-`(whichv(rl, FALSE), "omit") if(inherits(res, "data.table")) return(alc(res)) } } else { rl <- if(is.null(cols)) complete.cases(X) else complete.cases(X[, cols]) rkeep <- which(rl) if(length(rkeep) == NROW(X)) return(X) res <- if(is.matrix(X)) X[rkeep, , drop = FALSE] else X[rkeep] if(na.attr) attr(res, "na.action") <- `oldClass<-`(whichv(rl, FALSE), "omit") } res } na_insert <- function(X, prop = 0.1, value = NA) { if(is.list(X)) { n <- fnrow2(X) nmiss <- floor(n * prop) res <- duplAttributes(lapply(unattrib(X), function(y) `[<-`(y, sample.int(n, nmiss), value = value)), X) return(if(inherits(X, "data.table")) alc(res) else res) } if(!is.atomic(X)) stop("X must be an atomic vector, array or data.frame") l <- length(X) X[sample.int(l, floor(l * prop))] <- value X } fdapply <- function(X, FUN, ...) duplAttributes(lapply(`attributes<-`(X, NULL), FUN, ...), X) fnlevels <- function(x) length(attr(x, "levels")) # flevels <- function(x) attr(x, "levels") fnrow <- function(X) if(is.list(X)) length(.subset2(X, 1L)) else dim(X)[1L] fnrow2 <- function(X) length(.subset2(X, 1L)) fncol <- function(X) if(is.list(X)) length(unclass(X)) else dim(X)[2L] fNCOL <- function(X) if(is.list(X)) length(unclass(X)) else NCOL(X) fdim <- function(X) { if(is.atomic(X)) return(dim(X)) # or if !is.list ? oldClass(X) <- NULL c(length(X[[1L]]), length(X)) # Faster than c(length(.subset2(X, 1L)), length(unclass(X))) } seq_row <- function(X) if(is.list(X)) seq_along(.subset2(X, 1L)) else seq_len(dim(X)[1L]) seq_col <- function(X) if(is.list(X)) seq_along(unclass(X)) else seq_len(dim(X)[2L]) # na.last is false !! forder.int <- function(x) .Call(C_radixsort, FALSE, FALSE, FALSE, FALSE, TRUE, pairlist(x)) # if(is.unsorted(x)) .Call(C_forder, x, NULL, FALSE, TRUE, 1L, TRUE) else seq_along(x) # since forder gives integer(0) if sorted ! ford <- function(x, g = NULL) { if(!is.null(g)) { x <- c(if(is.atomic(g)) list(g) else if(is_GRP(g)) g[2L] else g, if(is.atomic(x)) list(x) else x, list(method = "radix")) return(do.call(order, x)) } if(is.list(x)) return(do.call(order, c(x, list(method = "radix")))) if(length(x) < 1000L) .Call(C_radixsort, TRUE, FALSE, FALSE, FALSE, TRUE, pairlist(x)) else order(x, method = "radix") } fsetdiff <- function(x, y) x[match(x, y, 0L) == 0L] # not unique ! ffka <- function(x, f) { ax <- attributes(x) `attributes<-`(f(ax[["levels"]])[x], ax[names(ax) %!in% c("levels", "class")]) } as_numeric_factor <- function(X, keep.attr = TRUE) { if(is.atomic(X)) if(keep.attr) return(ffka(X, as.numeric)) else return(as.numeric(attr(X, "levels"))[X]) res <- duplAttributes(lapply(unattrib(X), if(keep.attr) (function(y) if(is.factor(y)) ffka(y, as.numeric) else y) else (function(y) if(is.factor(y)) as.numeric(attr(y, "levels"))[y] else y)), X) if(inherits(X, "data.table")) return(alc(res)) res } as_character_factor <- function(X, keep.attr = TRUE) { if(is.atomic(X)) if(keep.attr) return(ffka(X, tochar)) else return(as.character.factor(X)) res <- duplAttributes(lapply(unattrib(X), if(keep.attr) (function(y) if(is.factor(y)) ffka(y, tochar) else y) else (function(y) if(is.factor(y)) as.character.factor(y) else y)), X) if(inherits(X, "data.table")) return(alc(res)) res } as.numeric_factor <- function(X, keep.attr = TRUE) { message("Note that 'as.numeric_factor' was renamed to 'as_numeric_factor'. It will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") as_numeric_factor(X, keep.attr) } as.character_factor <- function(X, keep.attr = TRUE) { message("Note that 'as.character_factor' was renamed to 'as_character_factor'. It will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") as_character_factor(X, keep.attr) } setRnDF <- function(df, nm) `attr<-`(df, "row.names", nm) TtI <- function(x) switch(x, replace_fill = 1L, replace = 2L, `-` = 3L, `-+` = 4L, `/` = 5L, `%` = 6L, `+` = 7L, `*` = 8L, `%%` = 9L, `-%%` = 10L, stop("Unknown transformation!")) condsetn <- function(x, value, cond) { if(cond) attr(x, "names") <- value x } setnck <- function(x, value) { if(is.null(value)) return(x) ren <- nzchar(value) if(all(ren)) names(x) <- value else names(x)[ren] <- value[ren] x } # give_nam <- function(x, gn, stub) { # if(!gn) return(x) # attr(x, "names") <- paste0(stub, attr(x, "names")) # x # } ckmatch <- function(x, table, e = "Unknown columns:") if(anyNA(m <- match(x, table))) stop(paste(e, paste(x[is.na(m)], collapse = ", "))) else m # anyNAerror <- function(x, e) if(anyNA(x)) stop(e) else x cols2int <- function(cols, x, nam, topos = TRUE) { if(is.numeric(cols)) { l <- length(unclass(x)) # length(nam) ? if(cols[1L] < 0L) { # This is sufficient to check negative indices: No R function allows subsetting mixing positive and negative indices. if(-bmin(cols) > l) stop("Index out of range abs(1:length(x))") if(topos) return(seq_len(l)[cols]) # cols <- seq_len(l)[cols] # if(!length(cols) || anyNA(cols)) stop("Index out of range abs(1:length(x))") -> used to put earlier check after if(topos) and use this one instead. But turns out that doesn't always work well. # return(cols) } else if(bmax(cols) > l) stop("Index out of range abs(1:length(x))") # if(bmax(abs(cols)) > length(unclass(x))) stop("Index out of range abs(1:length(x))") # Before collapse 1.4.0 ! return(as.integer(cols)) # as.integer is necessary (for C_subsetCols), and at very little cost.. } if(is.character(cols)) return(ckmatch(cols, nam)) if(is.function(cols)) return(which(vapply(unattrib(x), cols, TRUE))) if(is.logical(cols)) { if(length(cols) != length(unclass(x))) stop("Logical subsetting vector must match columns!") # length(nam) ? return(which(cols)) } stop("cols must be a function, character vector, numeric indices or logical vector!") } # Needed for fmutate cols2char <- function(cols, x, nam) { if(is.character(cols)) return(cols) if(!length(cols)) return("") # Needed if NULL is passed if(is.numeric(cols)) { l <- length(nam) if(cols[1L] < 0L) { if(-bmin(cols) > l) stop("Index out of range abs(1:length(x))") } else if(bmax(cols) > l) stop("Index out of range abs(1:length(x))") return(nam[cols]) } if(is.function(cols)) return(nam[vapply(unattrib(x), cols, TRUE)]) if(is.logical(cols)) { if(length(cols) != length(nam)) stop("Logical subsetting vector must match columns!") return(nam[cols]) } stop("cols must be a function, character vector, numeric indices or logical vector!") } # Not needed anymore !! # cols2log <- function(cols, x, nam) { # lx <- length(unclass(x)) # if(is.logical(cols)) if(length(cols) == lx) return(cols) else stop("Logical subsetting vector must match columns!") # if(is.function(cols)) return(vapply(unattrib(x), cols, TRUE)) # r <- logical(lx) # if(is.character(cols)) { # r[ckmatch(cols, nam)] <- TRUE # } else if(is.numeric(cols)) { # if(bmax(abs(cols)) > lx) stop("Index out of range abs(1:length(x))") # r[cols] <- TRUE # } else stop("cols must be a function, character vector, numeric indices or logical vector!") # r # } colsubset <- function(x, ind, checksf = FALSE) { if(is.numeric(ind)) return(.Call(C_subsetCols, x, as.integer(ind), checksf)) if(is.logical(ind)) { nc <- length(unclass(x)) if(length(ind) != nc) stop("Logical subsetting vector must match length(x)") ind <- which(ind) if(length(ind) == nc) return(x) return(.Call(C_subsetCols, x, ind, checksf)) } ind <- if(is.character(ind)) ckmatch(ind, attr(x, "names")) else which(vapply(`attributes<-`(x, NULL), ind, TRUE)) return(.Call(C_subsetCols, x, ind, checksf)) } # Previously Fastest! even though it involves code duplication.. # colsubset <- function(x, ind) { # ax <- attributes(x) # if(is.numeric(ind)) { # attributes(x) <- NULL # note: attributes(x) <- NULL is very slightly faster than class(x) <- NULL # if(bmax(abs(ind)) > length(x)) stop("Index out of range abs(1:length(x))") # ax[["names"]] <- ax[["names"]][ind] # return(.Call(C_setAttributes, x[ind], ax)) # } # if(is.logical(ind)) { # attributes(x) <- NULL # if(length(ind) != length(x)) stop("Logical subsetting vector must match length(x)") # ax[["names"]] <- ax[["names"]][ind] # return(.Call(C_setAttributes, x[ind], ax)) # } # ind <- if(is.character(ind)) ckmatch(ind, ax[["names"]]) else vapply(`attributes<-`(x, NULL), ind, TRUE) # ax[["names"]] <- ax[["names"]][ind] # .Call(C_setAttributes, .subset(x, ind), ax) # } fcolsubset <- function(x, ind, checksf = FALSE) { # fastest ! .Call(C_subsetCols, x, if(is.logical(ind)) which(ind) else as.integer(ind), checksf) # Fastet! becore C version: # ax <- attributes(x) # ax[["names"]] <- ax[["names"]][ind] # .Call(C_setAttributes, .subset(x, ind), ax) } # Sorted out 1.5.3 -> 1.6.0: # Fastest because vapply runs faster on a list without any attributes ! # colsubsetFUN <- function(x, FUN) { # .Call(C_subsetCols, x, which(vapply(`attributes<-`(x, NULL), FUN, TRUE))) # # Fastet! becore C version: # # ax <- attributes(x) # # attributes(x) <- NULL # # ind <- vapply(x, FUN, TRUE) # # ax[["names"]] <- ax[["names"]][ind] # # .Call(C_setAttributes, x[ind], ax) # } G_guo <- function(g) { if(is.atomic(g)) { if(inherits(g, c("factor", "qG"))) { if(inherits(g, "na.included") || !anyNA(unclass(g))) return(list(if(is.factor(g)) fnlevels(g) else attr(g, "N.groups"), unattrib(g), NULL)) if(is.factor(g)) { ng <- if(anyNA(lev <- attr(g, "levels"))) length(lev) else length(lev) + 1L } else ng <- attr(g, "N.groups") + 1L return(list(ng, copyv(unattrib(g), NA_integer_, ng), NULL)) } g <- .Call(C_group, g, FALSE, FALSE) return(list(attr(g,"N.groups"), g, NULL)) } if(inherits(g, "GRP")) return(g) g <- .Call(C_group, g, FALSE, FALSE) return(list(attr(g,"N.groups"), g, NULL)) } # Replaced by G_guo... # at2GRP <- function(x) { # if(is.nmfactor(x)) return(list(length(attr(x, "levels")), x, NULL)) # res <- list(NULL, NULL, NULL) # res[[2L]] <- qG(x, sort = FALSE, na.exclude = FALSE) # res[[1L]] <- attr(res[[2L]], "N.groups") # res # } G_t <- function(x) { # , wm = 1L if(is.null(x)) return(x) # { # if(wm > 0L) message(switch(wm, "Panel-lag computed without timevar: Assuming ordered data", # "Panel-difference computed without timevar: Assuming ordered data", # "Panel-growth rate computed without timevar: Assuming ordered data")) # return(x) # } # If integer time variable contains NA, does not break C++ code.. if(is.atomic(x)) { if(is.integer(unclass(x))) return(x) if(is.double(x) && !is.object(x)) return(as.integer(x)) return(qG(x, na.exclude = FALSE, sort = TRUE, method = "hash")) # make sure it is sorted ! qG already checks factor ! } # if(is_GRP(x)) return(x[[2L]]) # Not necessary because GRP.default also returns it.. return(GRP.default(x, return.groups = FALSE, sort = TRUE, call = FALSE)[[2L]]) } # Not currently used !! # G_t2 <- function(x) { # if(is.atomic(x)) if(is.integer(unclass(x))) return(x) else return(qG(x, sort = TRUE, na.exclude = FALSE, method = "hash")) # Hashing seems generally faster for time-variables !! # if(is_GRP(x)) return(x[[2L]]) else return(GRP.default(x, return.groups = FALSE, sort = TRUE, call = FALSE)[[2L]]) # } rgrep <- function(exp, nam, ..., sort = TRUE) if(length(exp) == 1L) grep(exp, nam, ...) else .Call(Cpp_funique, unlist(lapply(exp, grep, nam, ...), use.names = FALSE), sort) rgrepl <- function(exp, nam, ...) if(length(exp) == 1L) grepl(exp, nam, ...) else Reduce(`|`, lapply(exp, grepl, nam, ...)) fanyDuplicated <- function(x) if(length(x) < 100L) anyDuplicated.default(x) > 0L else .Call(Cpp_fndistinct,x,0L,0L,NULL,FALSE) != length(x) # NROW2 <- function(x, d) if(length(d)) d[1L] else length(x) # NCOL2 <- function(d, ilv) if(ilv) d[2L] else 1L charorNULL <- function(x) if(is.character(x)) x else NULL tochar <- function(x) if(is.character(x)) x else as.character(x) # if(is.object(x)) as.character(x) else .Call(C_aschar, x) # more security here? # unique_factor <- function(x) { # Still needed with new collap solution ? -> Nope ! # res <- seq_along(attr(x, "levels")) # .Call(C_duplAttributes, res, x) # } # dotstostr <- function(...) { # args <- deparse(substitute(c(...))) # nc <- nchar(args) # substr(args, 2, nc) # 3, nc-1 for no brackets ! # } unused_arg_action <- function(call, ...) { wo <- switch(getOption("collapse_unused_arg_action"), none = 0L, message = 1L, warning = 2L, error = 3L, stop("Unused argument encountered. Please instruct collapse what to do about unused arguments by setting options(collapse_unused_arg_action = 'warning'), or 'error', or 'message' or 'none'.")) if(wo != 0L) { args <- deparse(substitute(c(...))) nc <- nchar(args) args <- substr(args, 2, nc) # 3, nc-1 for no brackets ! msg <- paste("Unused argument", args, "passed to", as.character(call[[1L]])) switch(wo, message(msg), warning(msg), stop(msg)) } } is.nmfactor <- function(x) inherits(x, "factor") && (inherits(x, "na.included") || !anyNA(unclass(x))) addNA2 <- function(x) { if(!anyNA(unclass(x))) return(x) clx <- oldClass(x) oldClass(x) <- NULL if(!anyNA(lev <- attr(x, "levels"))) { attr(x, "levels") <- c(lev, NA_character_) x[is.na(x)] <- length(lev) + 1L } else x[is.na(x)] <- length(lev) `oldClass<-`(x, clx) } # addNA2 <- function(x) { # clx <- c(class(x), "na.included") # if(!anyNA(unclass(x))) return(`oldClass<-`(x, clx)) # ll <- attr(x, "levels") # if(!anyNA(ll)) ll <- c(ll, NA) # return(`oldClass<-`(factor(x, levels = ll, exclude = NULL), clx)) # } l1orn <- function(x, nam) if(length(x) == 1L) x else nam l1orlst <- function(x) if(length(x) == 1L) x else x[length(x)] fsimplify2array <- function(l) { res <- do.call(cbind, l) # lapply(l, `dimnames<-`, NULL) # also faster than unlist.. dim(res) <- c(dim(l[[1L]]), length(l)) dimnames(res) <- c(if(length(dn <- dimnames(l[[1L]]))) dn else list(NULL, NULL), list(names(l))) res } # fss <- function(x, i, j) { # rn <- attr(x, "row.names") # if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") return(.Call(C_subsetDT, x, i, j)) # return(`attr<-`(.Call(C_subsetDT, x, i, j), "row.names", rn[r])) # } # fsplit_DF <- function(x, j, f, rnl, ...) { # j <- seq_along(unclass(x)) # rn <- attr(x, "row.names") # if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") # return(lapply(split.default(seq_along(.subset2(x, 1L)), f, ...), # function(i) .Call(C_subsetDT, x, i, j))) # lapply(split.default(seq_along(.subset2(x, 1L)), f, ...), # function(i) `attr<-`(.Call(C_subsetDT, x, i, j), "row.names", rn[i])) # } collapse/R/fvar_fsd.R0000644000176200001440000003362214172367040014204 0ustar liggesusers # TODO: w.type - Implement reliability weights? # Note: for principal innovations of this code see fsum.R fsd <- function(x, ...) UseMethod("fsd") # , x fsd.default <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, stable.algo = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fsd.matrix(x, g, w, TRA, na.rm, use.g.names, stable.algo = stable.algo, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fvarsd,x,0L,0L,NULL,w,na.rm,stable.algo,TRUE)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(Cpp_fvarsd,x,length(lev),g,NULL,w,na.rm,stable.algo,TRUE), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fvarsd,x,fnlevels(g),g,NULL,w,na.rm,stable.algo,TRUE)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fvarsd,x,attr(g,"N.groups"),g,NULL,w,na.rm,stable.algo,TRUE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(Cpp_fvarsd,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE), GRPnames(g))) return(.Call(Cpp_fvarsd,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE)) } if(is.null(g)) return(.Call(Cpp_TRA,x,.Call(Cpp_fvarsd,x,0L,0L,NULL,w,na.rm,stable.algo,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRA,x,.Call(Cpp_fvarsd,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE),g[[2L]],TtI(TRA)) } fsd.matrix <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, stable.algo = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fvarsdm,x,0L,0L,NULL,w,na.rm,stable.algo,TRUE,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(Cpp_fvarsdm,x,length(lev),g,NULL,w,na.rm,stable.algo,TRUE,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(Cpp_fvarsdm,x,fnlevels(g),g,NULL,w,na.rm,stable.algo,TRUE,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fvarsdm,x,attr(g,"N.groups"),g,NULL,w,na.rm,stable.algo,TRUE,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(Cpp_fvarsdm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(Cpp_fvarsdm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAm,x,.Call(Cpp_fvarsdm,x,0L,0L,NULL,w,na.rm,stable.algo,TRUE,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAm,x,.Call(Cpp_fvarsdm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE),g[[2L]],TtI(TRA)) } fsd.data.frame <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, stable.algo = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fvarsdl,x,0L,0L,NULL,w,na.rm,stable.algo,TRUE,drop)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(Cpp_fvarsdl,x,length(lev),g,NULL,w,na.rm,stable.algo,TRUE,FALSE), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fvarsdl,x,fnlevels(g),g,NULL,w,na.rm,stable.algo,TRUE,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fvarsdl,x,attr(g,"N.groups"),g,NULL,w,na.rm,stable.algo,TRUE,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE), groups)) return(.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAl,x,.Call(Cpp_fvarsdl,x,0L,0L,NULL,w,na.rm,stable.algo,TRUE,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAl,x,.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE),g[[2L]],TtI(TRA)) } fsd.list <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, stable.algo = TRUE, ...) fsd.data.frame(x, g, w, TRA, na.rm, use.g.names, drop, stable.algo, ...) fsd.grouped_df <- function(x, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stable.algo = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- l1orn(as.character(substitute(w)), NULL) nam <- attr(x, "names") gn2 <- gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) sumw <- NULL if(length(wsym) && length(wn <- whichv(nam, wsym))) { w <- .subset2(x, wn) if(any(gn == wn)) stop("Weights coincide with grouping variables!") gn <- c(gn, wn) if(keep.w) { if(nTRAl) sumw <- `names<-`(list(fsumC(w,g[[1L]],g[[2L]],NULL,na.rm)), paste0("sum.", wsym)) else if(keep.group_vars) gn2 <- gn else sumw <- gn2 <- wn } } gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], names(sumw), nam[-gn]) return(setAttributes(c(g[[4L]], sumw, .Call(Cpp_fvarsdl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE)), ax)) } ax[["names"]] <- c(names(sumw), nam[-gn]) return(setAttributes(c(sumw, .Call(Cpp_fvarsdl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE)), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]], .Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE)), ax)) } else return(setAttributes(.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE), ax)) } else if(keep.group_vars || (keep.w && length(sumw))) { ax[["names"]] <- c(nam[gn2], nam[-gn]) return(setAttributes(c(x[gn2],.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fvarsdl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE),g[[2L]],TtI(TRA))), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fvarsdl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE),g[[2L]],TtI(TRA)), ax)) } else return(.Call(Cpp_TRAl,x,.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE),g[[2L]],TtI(TRA))) } fvar <- function(x, ...) UseMethod("fvar") # , x fvar.default <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, stable.algo = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fvar.matrix(x, g, w, TRA, na.rm, use.g.names, stable.algo = stable.algo, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fvarsd,x,0L,0L,NULL,w,na.rm,stable.algo,FALSE)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(Cpp_fvarsd,x,length(lev),g,NULL,w,na.rm,stable.algo,FALSE), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fvarsd,x,fnlevels(g),g,NULL,w,na.rm,stable.algo,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fvarsd,x,attr(g,"N.groups"),g,NULL,w,na.rm,stable.algo,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(Cpp_fvarsd,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE), GRPnames(g))) return(.Call(Cpp_fvarsd,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRA,x,.Call(Cpp_fvarsd,x,0L,0L,NULL,w,na.rm,stable.algo,FALSE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRA,x,.Call(Cpp_fvarsd,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE),g[[2L]],TtI(TRA)) } fvar.matrix <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, stable.algo = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fvarsdm,x,0L,0L,NULL,w,na.rm,stable.algo,FALSE,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(Cpp_fvarsdm,x,length(lev),g,NULL,w,na.rm,stable.algo,FALSE,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(Cpp_fvarsdm,x,fnlevels(g),g,NULL,w,na.rm,stable.algo,FALSE,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fvarsdm,x,attr(g,"N.groups"),g,NULL,w,na.rm,stable.algo,FALSE,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(Cpp_fvarsdm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(Cpp_fvarsdm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAm,x,.Call(Cpp_fvarsdm,x,0L,0L,NULL,w,na.rm,stable.algo,FALSE,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAm,x,.Call(Cpp_fvarsdm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE),g[[2L]],TtI(TRA)) } fvar.data.frame <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, stable.algo = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fvarsdl,x,0L,0L,NULL,w,na.rm,stable.algo,FALSE,drop)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(Cpp_fvarsdl,x,length(lev),g,NULL,w,na.rm,stable.algo,FALSE,FALSE), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fvarsdl,x,fnlevels(g),g,NULL,w,na.rm,stable.algo,FALSE,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fvarsdl,x,attr(g,"N.groups"),g,NULL,w,na.rm,stable.algo,FALSE,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE), groups)) return(.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAl,x,.Call(Cpp_fvarsdl,x,0L,0L,NULL,w,na.rm,stable.algo,FALSE,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAl,x,.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE),g[[2L]],TtI(TRA)) } fvar.list <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, stable.algo = TRUE, ...) fvar.data.frame(x, g, w, TRA, na.rm, use.g.names, drop, stable.algo, ...) fvar.grouped_df <- function(x, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stable.algo = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- l1orn(as.character(substitute(w)), NULL) nam <- attr(x, "names") gn2 <- gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) sumw <- NULL if(length(wsym) && length(wn <- whichv(nam, wsym))) { w <- .subset2(x, wn) if(any(gn == wn)) stop("Weights coincide with grouping variables!") gn <- c(gn, wn) if(keep.w) { if(nTRAl) sumw <- `names<-`(list(fsumC(w,g[[1L]],g[[2L]],NULL,na.rm)), paste0("sum.", wsym)) else if(keep.group_vars) gn2 <- gn else sumw <- gn2 <- wn } } gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], names(sumw), nam[-gn]) return(setAttributes(c(g[[4L]], sumw, .Call(Cpp_fvarsdl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE)), ax)) } ax[["names"]] <- c(names(sumw), nam[-gn]) return(setAttributes(c(sumw, .Call(Cpp_fvarsdl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE)), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]], .Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE)), ax)) } else return(setAttributes(.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE), ax)) } else if(keep.group_vars || (keep.w && length(sumw))) { ax[["names"]] <- c(nam[gn2], nam[-gn]) return(setAttributes(c(x[gn2],.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fvarsdl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE),g[[2L]],TtI(TRA))), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fvarsdl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE),g[[2L]],TtI(TRA)), ax)) } else return(.Call(Cpp_TRAl,x,.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE),g[[2L]],TtI(TRA))) } collapse/R/global_macros.R0000644000176200001440000001402614170157057015216 0ustar liggesusers # .NA_RM <- TRUE # global macros .COLLAPSE_TOPICS <- c("collapse-documentation","fast-statistical-functions","fast-grouping-ordering", "fast-data-manipulation","quick-conversion","advanced-aggregation", "data-transformations","time-series-panel-series","list-processing", "summary-statistics","recode-replace","efficient-programming","small-helpers") # .COLLAPSE_TOPICS <- c("collapse-documentation","A1-fast-statistical-functions","A2-fast-grouping-ordering", # "A3-fast-data-manipulation","A4-quick-conversion","A5-advanced-aggregation", # "A6-data-transformations","A7-time-series-panel-series","A8-list-processing", # "A9-summary-statistics","AA1-recode-replace","AA2-efficient-programming","AA3-small-helpers") # rd <- tools::Rd_db("collapse") # .COLLAPSE_HELP <- unlist(lapply(rd, tools:::.Rd_get_metadata, "name"), use.names = FALSE) # grep("^A|depreciated", unlist(lapply(rd, tools:::.Rd_get_metadata, "name"), use.names = FALSE), invert = TRUE, value = TRUE) # Get updated .COLLAPSE_ALL: # ".default$|.matrix$|.data.frame$" # v <- grep("\\.|N|HD", objects("package:collapse"), invert = TRUE, value = TRUE) # getNamespaceExports("collapse") # v <- c(v, "HDB", "HDW", "allNA", "whichNA") # cat(v, sep = '", "') # all package objects.. # allobj <- ls(getNamespace("collapse"), all.names=TRUE) .COLLAPSE_ALL <- sort(unique(c("%-=%", "%!=%", "%!in%", "%*=%", "%/=%", "%+=%", "%=%", "%==%", "%c-%", "%c*%", "%c/%", "%c+%", "%cr%", "%r-%", "%r*%", "%r/%", "%r+%", "%rr%", "add_stub", "add_vars", "add_vars<-", "all_identical", "all_obj_equal", "alloc", "allv", "anyv", "as_character_factor", "as_factor_GRP", "as_factor_qG", "as_numeric_factor", "atomic_elem", "atomic_elem<-", "av", "av<-", "B", "BY", "cat_vars", "cat_vars<-", "char_vars", "char_vars<-", "cinv", "ckmatch", "collap", "collapg", "collapv", "colorder", "colorderv", "copyAttrib", "copyMostAttrib", "copyv", "D", "dapply", "date_vars", "Date_vars", "date_vars<-", "Date_vars<-", "descr", "Dlog", "F", "fact_vars", "fact_vars<-", "fbetween", "fcompute", "fcomputev", "fcumsum", "fdiff", "fdim", "fdroplevels", "ffirst", "fFtest", "fgroup_by", "fgroup_vars", "fgrowth", "fhdbetween", "fhdwithin", "finteraction", "flag", "flast", "flm", "fmax", "fmean", "fmedian", "fmin", "fmode", "fmutate", "fncol", "fndistinct", "fnlevels", "fnobs", "fnrow", "fnth", "fprod", "frename", "fscale", "fsd", "fselect", "fselect<-", "fsubset", "fsum", "fsummarise", "ftransform", "ftransform<-", "ftransformv", "fungroup", "funique", "fvar", "fwithin", "G", "gby", "get_elem", "get_vars", "get_vars<-", "GGDC10S", "group", "groupid", "GRP", "GRPnames", "gsplit", "gv", "gv<-", "gvr", "gvr<-", "has_elem", "irreg_elem", "is_categorical", "is_date", "is_GRP", "is_qG", "is_unlistable", "L", "ldepth", "list_elem", "list_elem<-", "logi_vars", "logi_vars<-", "massign", "mctl", "missing_cases", "mrtl", "mtt", "na_insert", "na_omit", "na_rm", "namlab", "num_vars", "num_vars<-", "nv", "nv<-", "pad", "psacf", "psccf", "psmat", "pspacf", "pwcor", "pwcov", "pwnobs", "qDF", "qDT", "qF", "qG", "qM", "qsu", "qTBL", "radixorder", "radixorderv", "rapply2d", "Recode", "recode_char", "recode_num", "reg_elem", "relabel", "replace_Inf", "replace_non_finite", "replace_outliers", "rm_stub", "rnm", "roworder", "roworderv", "rsplit", "sbt", "seq_col", "seq_row", "seqid", "setAttrib", "setColnames", "setDimnames", "setLabels", "setop", "setrelabel", "setrename", "setRownames", "settfm", "settfmv", "settransform", "settransformv", "setv", "slt", "slt<-", "smr", "ss", "STD", "t_list", "tfm", "tfm<-", "tfmv", "TRA", "unattrib", "unlist2d", "varying", "vclasses", "vlabels", "vlabels<-", "vlengths", "vtypes", "W", "whichv", "wlddev", "HDB", "HDW", "allNA", "whichNA"))) .COLLAPSE_GENERIC <- sort(unique(c("B","BY","D","Dlog","F","fsubset","fbetween","fdiff","ffirst","fgrowth","fhdbetween", "fhdwithin","flag","flast","fmax","fmean","fmedian","fnth","fmin","fmode","varying", "fndistinct","fnobs","fprod","fscale","fsd","fsum","fcumsum","fvar","fwithin","funique", "G","GRP","HDB","HDW","L","psacf","psccf","psmat","pspacf","qsu", "rsplit","fdroplevels", "STD","TRA","W"))) .COLLAPSE_DATA <- c("GGDC10S", "wlddev") .FAST_FUN <- c("fmean","fmedian","fmode","fsum","fprod","fsd","fvar", "fmin","fmax","fnth","ffirst","flast","fnobs","fndistinct", "fcumsum","fscale","fbetween","fwithin","fhdbetween","fhdwithin", "flag","fdiff","fgrowth") .FAST_STAT_FUN <- c("fmean","fmedian","fmode","fsum","fprod","fsd","fvar", "fmin","fmax","fnth","ffirst","flast","fnobs","fndistinct") .OPERATOR_FUN <- c("STD","B","W","HDB","HDW","L","F","D","Dlog","G") .FAST_STAT_FUN_POLD <- c(.FAST_STAT_FUN, "fNobs","fNdistinct") .FAST_FUN_MOPS <- c(.FAST_STAT_FUN_POLD, "fcumsum","fscale","fbetween","fwithin", "flag","fdiff","fgrowth","STD","B","W","L","F","D","Dlog","G") .FAST_STAT_FUN_EXT <- c(.FAST_STAT_FUN_POLD, paste0(.FAST_STAT_FUN_POLD, "_uw")) collapse/R/recode_replace.R0000644000176200001440000006036014174223734015350 0ustar liggesusers# Note: don't change the order of these arguments !!! scv <- function(x, v, r, set = FALSE, inv = FALSE) .Call(C_setcopyv, x, v, r, inv, set, FALSE) # inspired by ?dplyr::recode # Think about adopting this code for as_numeric_factor and as_character_factor recode_num <- function(X, ..., default = NULL, missing = NULL, set = FALSE) { if(missing(...)) stop("recode_num requires arguments of the form: value = replacement") args <- list(...) nam <- as.numeric(names(args)) # nzchar(names(args)) ... check non-empty names ? -> nah, this package is not for dummies if(anyNA(nam)) stop(paste("Non-numeric arguments:", paste(names(args)[is.na(nam)], collapse = ", "))) arglen <- length(args) missingl <- !is.null(missing) if(missingl && any(nam == missing)) warning(paste0("To improve performance missing values are replaced prior to recode, so this replaces all missing values with ", missing, " and those are then again replaced with ", args[[which(nam == missing)]], ". If this is not desired, call replace_NA after recode with missing = NULL.")) if(arglen == 1L) { args <- args[[1L]] if(is.null(default)) { if(missingl) { repfun <- function(y) if(is.numeric(y)) { z <- scv(y, NA, missing, set) # y[is.na(y)] <- missing scv(z, nam, args, TRUE) # `[<-`(y, y == nam, value = args) } else y } else { repfun <- function(y) if(is.numeric(y)) scv(y, nam, args, set) else y # `[<-`(y, y == nam, value = args) } } else { nr <- if(is.atomic(X)) NROW(X) else fnrow2(X) if(missingl) { repfun <- function(y) if(is.numeric(y)) { z <- scv(y, NA, default, set, TRUE) # duplAttributes(alloc(default, nr), y) scv(z, NA, missing, TRUE) # z[is.na(y)] <- missing # could put behind -> better but inconsistent `[<-`(z, whichv(y, nam), value = args) # y == nam } else y } else { repfun <- function(y) if(is.numeric(y)) scv(scv(y, nam, default, set, TRUE), nam, args, TRUE) else y # `[<-`(duplAttributes(alloc(default, nr), y), y == nam, value = args) } } } else { seqarg <- seq_len(arglen) if(is.null(default)) { if(missingl) { repfun <- function(y) if(is.numeric(y)) { y <- scv(y, NA, missing, set) # y[is.na(y)] <- missing z <- y for(i in seqarg) z[whichv(y, nam[i])] <- args[[i]] z } else y } else { repfun <- function(y) if(is.numeric(y)) { z <- y for(i in seqarg) z[whichv(y, nam[i])] <- args[[i]] z } else y } } else { nr <- if(is.atomic(X)) NROW(X) else fnrow2(X) if(missingl) { repfun <- function(y) if(is.numeric(y)) { z <- scv(y, NA, default, set, TRUE) # duplAttributes(alloc(default, nr), y) scv(z, NA, missing, TRUE) # z[is.na(y)] <- missing # could put behind -> better but inconsistent for(i in seqarg) z[whichv(y, nam[i])] <- args[[i]] z } else y } else { repfun <- function(y) if(is.numeric(y)) { z <- scv(y, nam[1L], default, set, TRUE) # duplAttributes(alloc(default, nr), y) scv(z, nam[1L], args[[1L]], TRUE) for(i in seqarg[-1L]) z[whichv(y, nam[i])] <- args[[i]] z } else y } } } if(is.list(X)) { res <- duplAttributes(lapply(unattrib(X), repfun), X) return(if(inherits(X, "data.table")) alc(res) else res) } if(!is.numeric(X)) stop("X needs to be numeric or a list") repfun(X) } recode_char <- function(X, ..., default = NULL, missing = NULL, regex = FALSE, ignore.case = FALSE, fixed = FALSE, set = FALSE) { if(missing(...)) stop("recode_char requires arguments of the form: value = replacement") args <- list(...) nam <- names(args) arglen <- length(args) missingl <- !is.null(missing) if(missingl && any(nam == missing)) warning(paste0("To improve performance missing values are replaced prior to recode, so this replaces all missing values with ", missing, " and those are then again replaced with ", args[[which(nam == missing)]], ". If this is not desired, call replace_NA after recode with missing = NULL.")) if(regex) { if(arglen == 1L) { args <- args[[1L]] if(is.null(default)) { if(missingl) { repfun <- function(y) if(is.character(y)) { y <- scv(y, NA, missing, set) # y[is.na(y)] <- missing `[<-`(y, grepl(nam, y, ignore.case, FALSE, fixed), value = args) } else y } else { repfun <- function(y) if(is.character(y)) `[<-`(y, grepl(nam, y, ignore.case, FALSE, fixed), value = args) else y } } else { nr <- if(is.atomic(X)) NROW(X) else fnrow2(X) if(missingl) { repfun <- function(y) if(is.character(y)) { z <- scv(y, NA, default, set, TRUE) # duplAttributes(alloc(default, nr), y) scv(z, NA, missing, TRUE) # z[is.na(y)] <- missing # could put behind -> better but inconsistent `[<-`(z, grepl(nam, y, ignore.case, FALSE, fixed), value = args) } else y } else { repfun <- function(y) if(is.character(y)) `[<-`(duplAttributes(alloc(default, nr), y), grepl(nam, y, ignore.case, FALSE, fixed), value = args) else y } } } else { seqarg <- seq_len(arglen) if(is.null(default)) { if(missingl) { repfun <- function(y) if(is.character(y)) { y <- scv(y, NA, missing, set) # y[is.na(y)] <- missing z <- y for(i in seqarg) z[grepl(nam[i], y, ignore.case, FALSE, fixed)] <- args[[i]] z } else y } else { repfun <- function(y) if(is.character(y)) { z <- y for(i in seqarg) z[grepl(nam[i], y, ignore.case, FALSE, fixed)] <- args[[i]] z } else y } } else { nr <- if(is.atomic(X)) NROW(X) else fnrow2(X) if(missingl) { repfun <- function(y) if(is.character(y)) { z <- scv(y, NA, default, set, TRUE) # duplAttributes(alloc(default, nr), y) scv(z, NA, missing, TRUE) # z[is.na(y)] <- missing # could put behind -> better but inconsistent for(i in seqarg) z[grepl(nam[i], y, ignore.case, FALSE, fixed)] <- args[[i]] z } else y } else { repfun <- function(y) if(is.character(y)) { z <- duplAttributes(alloc(default, nr), y) for(i in seqarg) z[grepl(nam[i], y, ignore.case, FALSE, fixed)] <- args[[i]] z } else y } } } } else { if(arglen == 1L) { args <- args[[1L]] if(is.null(default)) { if(missingl) { repfun <- function(y) if(is.character(y)) { y <- scv(y, NA, missing, set) # y[is.na(y)] <- missing scv(y, nam, args, TRUE) # `[<-`(y, y == nam, value = args) } else y } else { repfun <- function(y) if(is.character(y)) scv(y, nam, args, set) else y # `[<-`(y, y == nam, value = args) } } else { nr <- if(is.atomic(X)) NROW(X) else fnrow2(X) if(missingl) { repfun <- function(y) if(is.character(y)) { z <- scv(y, NA, default, set, TRUE) # duplAttributes(alloc(default, nr), y) scv(z, NA, missing, TRUE) # z[is.na(y)] <- missing # could put behind -> better but inconsistent `[<-`(z, whichv(y, nam), value = args) } else y } else { repfun <- function(y) if(is.character(y)) scv(scv(y, nam, default, set, TRUE), nam, args, TRUE) else y # `[<-`(duplAttributes(alloc(default, nr), y), y == nam, value = args) } } } else { seqarg <- seq_len(arglen) if(is.null(default)) { if(missingl) { repfun <- function(y) if(is.character(y)) { y <- scv(y, NA, missing, set) # y[is.na(y)] <- missing z <- y for(i in seqarg) z[whichv(y, nam[i])] <- args[[i]] z } else y } else { repfun <- function(y) if(is.character(y)) { z <- y for(i in seqarg) z[whichv(y, nam[i])] <- args[[i]] z } else y } } else { nr <- if(is.atomic(X)) NROW(X) else fnrow2(X) if(missingl) { repfun <- function(y) if(is.character(y)) { z <- scv(y, NA, default, set, TRUE) # duplAttributes(alloc(default, nr), y) scv(z, NA, missing, TRUE) # z[is.na(y)] <- missing # could put behind -> better but inconsistent for(i in seqarg) z[whichv(y, nam[i])] <- args[[i]] z } else y } else { repfun <- function(y) if(is.character(y)) { z <- scv(y, nam[1L], default, set, TRUE) # duplAttributes(alloc(default, nr), y) scv(z, nam[1L], args[[1L]], TRUE) for(i in seqarg[-1L]) z[whichv(y, nam[i])] <- args[[i]] z } else y } } } } if(is.list(X)) { res <- duplAttributes(lapply(unattrib(X), repfun), X) return(if(inherits(X, "data.table")) alc(res) else res) } if(!is.character(X)) stop("X needs to be character or a list") repfun(X) } replace_NA <- function(X, value = 0L, cols = NULL, set = FALSE) { if(set) { if(is.list(X)) { if(is.null(cols)) { lapply(unattrib(X), scv, NA, value, TRUE) } else if(is.function(cols)) { lapply(unattrib(X), function(y) if(cols(y)) scv(y, NA, value, TRUE) else y) } else { cols <- cols2int(cols, X, attr(X, "names"), FALSE) lapply(unattrib(X)[cols], scv, NA, value, TRUE) } } else scv(X, NA, value, TRUE) # `[<-`(X, is.na(X), value = value) return(invisible(X)) } if(is.list(X)) { if(is.null(cols)) return(condalc(duplAttributes(lapply(unattrib(X), scv, NA, value), X), inherits(X, "data.table"))) # function(y) `[<-`(y, is.na(y), value = value) if(is.function(cols)) return(condalc(duplAttributes(lapply(unattrib(X), function(y) if(cols(y)) scv(y, NA, value) else y), X), inherits(X, "data.table"))) clx <- oldClass(X) oldClass(X) <- NULL cols <- cols2int(cols, X, names(X), FALSE) X[cols] <- lapply(unattrib(X[cols]), scv, NA, value) # function(y) `[<-`(y, is.na(y), value = value) return(condalc(`oldClass<-`(X, clx), any(clx == "data.table"))) } scv(X, NA, value) # `[<-`(X, is.na(X), value = value) } # Remove Inf (Infinity) and NaN (Not a number) from vectors or data frames: replace_Inf <- function(X, value = NA, replace.nan = FALSE) { if(is.list(X)) { # if(!inherits(X, "data.frame")) stop("replace_non_finite only works with atomic objects or data.frames") res <- duplAttributes(lapply(unattrib(X), if(replace.nan) (function(y) if(is.numeric(y)) `[<-`(y, is.infinite(y) | is.nan(y), value = value) else y) else (function(y) if(is.numeric(y)) `[<-`(y, is.infinite(y), value = value) else y)), X) return(if(inherits(X, "data.table")) alc(res) else res) } if(!is.numeric(X)) stop("Infinite values can only be replaced in numeric objects!") if(replace.nan) return(`[<-`(X, is.infinite(X) | is.nan(X), value = value)) # !is.finite(X) also replaces NA `[<-`(X, is.infinite(X), value = value) } replace_non_finite <- function(X, value = NA, replace.nan = TRUE) { .Deprecated("replace_Inf") replace_Inf(X, value, replace.nan) } replace_outliers <- function(X, limits, value = NA, single.limit = c("SDs", "min", "max", "overall_SDs")) { ll <- length(limits) if(lg1 <- ll > 1L) { if(ll > 2L) stop("length(limits) must be 1 or 2") l1 <- limits[1L] l2 <- limits[2L] } if(is.list(X)) { # if(!inherits(X, "data.frame")) stop("replace_outliers only works with atomic objects or data.frames") if(lg1) { res <- duplAttributes(lapply(unattrib(X), function(y) if(is.numeric(y)) `[<-`(y, y < l1 | y > l2, value = value) else y), X) # could use data.table::between -> but it seems not faster ! } else { res <- switch(single.limit[1L], # Allows grouped scaling if X is a grouped_df, but requires extra memory equal to X ... extra argument gSDs ? SDs = { if(inherits(X, c("grouped_df", "pdata.frame"))) { num <- .Call(C_vtypes, X, 1L) # vapply(unattrib(X), is.numeric, TRUE) num <- if(inherits(X, "grouped_df")) num & !fgroup_vars(X, "logical") else num & attr(getpix(attr(X, "index")), "names") %!in% attr(X, "names") clx <- oldClass(X) STDXnum <- fscale(fcolsubset(X, num)) oldClass(X) <- NULL X[num] <- mapply(function(z, y) `[<-`(z, abs(y) > limits, value = value), unattrib(X[num]), unattrib(STDXnum), SIMPLIFY = FALSE) `oldClass<-`(X, clx) } else duplAttributes(lapply(unattrib(X), function(y) if(is.numeric(y)) `[<-`(y, abs(fscaleCpp(y)) > limits, value = value) else y), X) }, min = duplAttributes(lapply(unattrib(X), function(y) if(is.numeric(y)) `[<-`(y, y < limits, value = value) else y), X), max = duplAttributes(lapply(unattrib(X), function(y) if(is.numeric(y)) `[<-`(y, y > limits, value = value) else y), X), overall_SDs = duplAttributes(lapply(unattrib(X), function(y) if(is.numeric(y)) `[<-`(y, abs(fscaleCpp(y)) > limits, value = value) else y), X), stop("Unknown single.limit option")) } return(if(inherits(res, "data.table")) alc(res) else res) } if(!is.numeric(X)) stop("Outliers can only be replaced in numeric objects!") if(lg1) return(`[<-`(X, X < l1 | X > l2, value = value)) switch(single.limit[1L], SDs =, overall_SDs = `[<-`(X, abs(fscale(X)) > limits, value = value), min = `[<-`(X, X < limits, value = value), max = `[<-`(X, X > limits, value = value), stop("Unknown single.limit option")) } # pad or fpad? x is vector, matrix or data.frame pad_atomic <- function(x, i, n, value) { ax <- attributes(x) tx <- typeof(x) if(typeof(value) != tx) value <- as.vector(value, tx) if(is.matrix(x)) { k <- dim(x)[2L] m <- .Call(C_alloc, value, n * k) # matrix(value, n, k) dim(m) <- c(n, k) m[i, ] <- x if(length(ax) == 1L) return(m) ax[["dim"]] <- c(n, k) # Could also pad row-names? perhaps with names of i ?? if(length(ax[["dimnames"]][[1L]])) ax[["dimnames"]] <- list(NULL, ax[["dimnames"]][[2L]]) if(is.object(x)) ax[["class"]] <- NULL return(`attributes<-`(m, ax)) # fastest ?? } r <- .Call(C_alloc, value, n) # matrix(value, n) # matrix is faster than rep_len !!!! r[i] <- x if(is.null(ax)) return(r) if(length(names(x))) { if(length(ax) == 1L) return(r) ax[["names"]] <- NULL } return(`attributes<-`(r, ax)) } # microbenchmark::microbenchmark(x[-i] <- ri, x[i2] <- ri) # Unit: milliseconds # expr min lq mean median uq max neval cld # x[-i] <- ri 255.16654 420.7083 491.7369 446.0340 476.3324 1290.7396 100 b # x[i2] <- ri 80.18755 136.8012 157.0027 146.8156 166.7158 311.5526 100 a # microbenchmark::microbenchmark(seq_along(x)[-i]) # Unit: milliseconds # expr min lq mean median uq max neval # seq_along(x)[-i] 506.0745 541.7975 605.0245 567.8115 585.8384 1341.035 100 pad <- function(X, i, value = NA, method = c("auto", "xpos", "vpos")) { # 1 - i is same length as X, fill missing, 2 - i is positive: insert missing values in positions ilog <- is.logical(i) ineg <- i[1L] < 0L n <- if(is.list(X)) length(.subset2(X, 1L)) else if(is.matrix(X)) dim(X)[1L] else length(X) xpos <- switch(method[1L], auto = if(ilog) bsum(i) == n else if(ineg) FALSE else length(i) == n, xpos = TRUE, vpos = FALSE, stop("Unknown method: ", method[1L])) n <- if(ilog) length(i) else if(xpos && !ineg) bmax(i) else n + length(i) if(is.atomic(X)) return(pad_atomic(X, if(xpos || ineg) i else if(ilog) !i else -i, n, value)) if(!is.list(X)) stop("X must be atomic or a list") if(ilog) { i <- if(xpos) which(i) else whichv(i, FALSE) } else if(!xpos) { i <- seq_len(n)[if(ineg) i else -i] } ax <- attributes(X) attributes(X) <- NULL res <- lapply(X, pad_atomic, i, n, value) if(length(ax[["row.names"]])) ax[["row.names"]] <- .set_row_names(n) return(condalcSA(res, ax, any(ax[["class"]] == "data.table"))) } # Something like this already exists?? -> should work with lists as well... # Previous version of Recode (Until collapse 1.1.0), Now depreciated in favor or recode_num and recode_char comp <- function(x, val) do.call(cbind, lapply(x, `==`, val)) comp_grepl <- function(x, val) do.call(cbind, lapply(x, function(y) grepl(val, y))) Recode <- function(X, ..., copy = FALSE, reserve.na.nan = TRUE, regex = FALSE) { .Deprecated("recode_num, recode_char") if(missing(...)) stop("Recode requires arguments of the form: value = replacement") if(is.list(X) && !inherits(X, "data.frame")) stop("Recode only works with atomic objects or data.frames") args <- list(...) nam <- names(args) if(reserve.na.nan && any(wm <- !is.na(ma <- match(c("NaN","NA"), nam)))) { # more efficient way to code this ?? if(wm[1L]) { # note: does not give multiple-matching error !! if(is.list(X)) { X[do.call(cbind, lapply(X, is.nan))] <- args[[ma[1L]]] } else X[is.nan(X)] <- args[[ma[1L]]] } if(wm[2L]) X[is.na(X)] <- args[[ma[2L]]] # is.na already accounts for NaN, so this needs to the the order!! if(bsum(wm) == length(args)) return(X) args <- args[-ma[wm]] nam <- names(args) } arglen <- length(args) onearg <- arglen == 1L if(regex) { if(is.list(X)) { if(onearg) { X[comp_grepl(X, nam)] <- args[[1L]] } else if(copy) { for (i in seq_along(X)) { Xi <- X[[i]] for (j in seq_len(arglen)) X[[i]][grepl(nam[j], Xi)] <- args[[j]] } } else for (j in seq_len(arglen)) X[comp_grepl(X, nam[j])] <- args[[j]] } else { if(onearg) X[grepl(nam, X)] <- args[[1L]] else if(copy) { Y <- X for (j in seq_len(arglen)) X[grepl(nam[j], Y)] <- args[[j]] } else for (j in seq_len(arglen)) X[grepl(nam[j], X)] <- args[[j]] } } else { if(is.list(X)) { oldopts <- options(warn = -1) # faster than suppressWarnings !! on.exit(options(oldopts)) numnam <- as.numeric(nam) # suppressWarnings(numnam <- as.numeric(nam)) -> this line takes most time !! if(onearg && !is.na(numnam)) nam <- numnam else { nam <- as.vector(nam, "list") if(any(numnaml <- !is.na(numnam))) nam[numnaml] <- numnam[numnaml] } if(onearg) X[comp(X, nam)] <- args[[1L]] else if(copy) { Y <- X for (j in seq_len(arglen)) X[comp(Y, nam[[j]])] <- args[[j]] } else for (j in seq_len(arglen)) X[comp(X, nam[[j]])] <- args[[j]] } else { if(!is.character(X)) nam <- as.numeric(nam) if(onearg) X[X == nam] <- args[[1L]] else if(copy) { Y <- X for (j in seq_len(arglen)) X[Y == nam[j]] <- args[[j]] } else for (j in seq_len(arglen)) X[X == nam[j]] <- args[[j]] } } return(X) } # Experimental: # recode_num2 is slightly slower than recode_num above .... (because 2 times apply) but almost identical.. # recode_num2 <- function(X, ...) { # , regex = FALSE # , default = NULL # if(missing(...)) stop("Recode requires arguments of the form: value = replacement") # args <- list(...) # nam <- as.numeric(names(args)) # if(anyNA(nam)) stop(paste("Non-numeric arguments:", paste(names(args)[is.na(nam)], collapse = ", "))) # arglen <- length(args) # # if(arglen == 1L) { # args <- args[[1L]] # repfun <- function(y) `[<-`(y, y == nam, value = args) # } else { # seqarg <- seq_len(arglen) # repfun <- function(y) { # z <- y # for(i in seqarg) z[y == nam[i]] <- args[[i]] # z # } # } # if(is.list(X)) { # num <- vapply(unattrib(X), is.numeric, TRUE) # clx <- class(X) # class(X) <- NULL # X[num] <- lapply(X[num], repfun) # return(`oldClass<-`(X, clx)) # } # if(!is.numeric(X)) stop("X needs to be numeric or a list") # return(repfun(X)) # } # # # # # possibly even faster by converting df to matrix and back for multiple comp ?? # Recode2 <- function(X, ...) { # , regex = FALSE # , default = NULL # if(missing(...)) stop("Recode requires arguments of the form: value = replacement") # args <- list(...) # nam <- names(args) # arglen <- length(args) # onearg <- arglen == 1L # # if(onearg) { # args <- args[[1L]] # repfun <- function(y) { # if(is.numeric(y)) { # v <- as.numeric(nam) # if(is.na(v)) { # warning("Trying to replace a non-numeric expressiom in a numeric column: Skipping this column") # return(y) # } # `[<-`(y, y == v, value = args) # } else `[<-`(y, y == nam, value = args) # } # } else { # seqarg <- seq_len(arglen) # repfun <- function(y) { # z <- y # if(is.numeric(y)) { # if(is.null(v)) { # v <<- as.numeric(nam) # if(anyNA(v)) { # warning("Trying to replace a non-numeric expressiom in a numeric column: Skipping those expressions") # v <- na_rm(v) # if(!length(v)) return(y) # } # } # for(i in seqarg) z[y == v[i]] <- args[[i]] # } else { # for(i in seqarg) z[y == nam[i]] <- args[[i]] # } # z # } # } # # if(is.list(X)) { # return(duplAttributes(lapply(unattrib(X), repfun), X)) # } # if(!is.character(X)) storage.mode(nam) <- storage.mode(X) # return(repfun(X)) # } # # # rec1 <- function(x, ...) { # args <- list(...) # nam <- as.numeric(names(args)) # ax <- attributes(x) # attributes(x) <- NULL # for(i in seq_along(args)) { # ni <- nam[i] # Faster! # argi <- args[[i]] # x <- lapply(x, function(y) `[<-`(y, y == ni, value = argi)) # } # setAttributes(x, ax) # } # # More memory efficient !! # rec2 <- function(x, ...) { # args <- list(...) # nam <- as.numeric(names(args)) # if(length(args) > 1L) repfun <- function(y) { # for(i in seq_along(args)) y[y == nam[i]] <- args[[i]] # y # } else repfun <- function(y) `[<-`(y, y == nam, value = args[[1L]]) # dapply(x, repfun) # } # # Even more memory efficient and faster !! # rec3 <- function(x, ...) { # args <- list(...) # nam <- as.numeric(names(args)) # if(length(args) > 1L) repfun <- function(y) { # z <- y # for(i in seq_along(args)) z[y == nam[i]] <- args[[i]] # z # } else repfun <- function(y) `[<-`(y, y == nam, value = args[[1L]]) # dapply(x, repfun) # } # # # tf <- function(x, ...) list(...) # # # profvis({ # # v <- c("a","b","c") # # Recode(v, a = "b") # # }) # Previous Versions (until collapse 1.1.0) # replace_outliers <- function(X, limits, value = NA, single.limit = c("SDs","Min","Max")) { # if(length(limits) > 1L) { # betw <- function(x, lim) x < lim[1L] | x > lim[2L] # if(is.list(X)) { # for (i in seq_along(X)) X[[i]][betw(X[[i]], limits)] <- value # } else X[betw(X, limits)] <- value # return(X) # } else { # switch(single.limit[1L], # SDs = { # if(is.list(X)) { # gsd <- function(x, lim) abs(x) > lim*fsdCpp(x) # for (i in seq_along(X)) X[[i]][gsd(X[[i]], limits)] <- value # } else X[abs(X) > lim*fsd(X)] <- value # return(X) # }, # Min = { # if(is.list(X)) { # for (i in seq_along(X)) X[[i]][X[[i]] < limits] <- value # } else X[X < limits] <- value # return(X) # }, # Max = { # if(is.list(X)) { # for (i in seq_along(X)) X[[i]][X[[i]] > limits] <- value # } else X[X > limits] <- value # return(X) # }, # stop("Unknown single.limit option")) # } # } collapse/R/fnobs.R0000644000176200001440000001356414172367040013524 0ustar liggesusers # For foundational changes to this code see fsum.R fnobs <- function(x, ...) UseMethod("fnobs") # , x fnobs.default <- function(x, g = NULL, TRA = NULL, use.g.names = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fnobs.matrix(x, g, TRA, use.g.names, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(C_fnobs,x,0L,0L)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(C_fnobs,x,length(lev),g), lev)) } if(is.nmfactor(g)) return(.Call(C_fnobs,x,fnlevels(g),g)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fnobs,x,attr(g,"N.groups"),g)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(C_fnobs,x,g[[1L]],g[[2L]]), GRPnames(g))) return(.Call(C_fnobs,x,g[[1L]],g[[2L]])) } if(is.null(g)) return(.Call(Cpp_TRA,x,.Call(C_fnobs,x,0L,0L),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRA,x,.Call(C_fnobs,x,g[[1L]],g[[2L]]),g[[2L]],TtI(TRA)) } fnobs.matrix <- function(x, g = NULL, TRA = NULL, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(C_fnobsm,x,0L,0L,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(C_fnobsm,x,length(lev),g,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(C_fnobsm,x,fnlevels(g),g,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fnobsm,x,attr(g,"N.groups"),g,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(C_fnobsm,x,g[[1L]],g[[2L]],FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(C_fnobsm,x,g[[1L]],g[[2L]],FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAm,x,.Call(C_fnobsm,x,0L,0L,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAm,x,.Call(C_fnobsm,x,g[[1L]],g[[2L]],FALSE),g[[2L]],TtI(TRA)) } fnobs.data.frame <- function(x, g = NULL, TRA = NULL, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(C_fnobsl,x,0L,0L,drop)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(C_fnobsl,x,length(lev),g,FALSE), lev)) } if(is.nmfactor(g)) return(.Call(C_fnobsl,x,fnlevels(g),g,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fnobsl,x,attr(g,"N.groups"),g,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(C_fnobsl,x,g[[1L]],g[[2L]],FALSE), groups)) return(.Call(C_fnobsl,x,g[[1L]],g[[2L]],FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAl,x,.Call(C_fnobsl,x,0L,0L,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAl,x,.Call(C_fnobsl,x,g[[1L]],g[[2L]],FALSE),g[[2L]],TtI(TRA)) } fnobs.list <- function(x, g = NULL, TRA = NULL, use.g.names = TRUE, drop = TRUE, ...) fnobs.data.frame(x, g, TRA, use.g.names, drop, ...) fnobs.grouped_df <- function(x, TRA = NULL, use.g.names = FALSE, keep.group_vars = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam[-gn]) return(setAttributes(c(g[[4L]],.Call(C_fnobsl,x[-gn],g[[1L]],g[[2L]],FALSE)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(C_fnobsl,x[-gn],g[[1L]],g[[2L]],FALSE), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]],.Call(C_fnobsl,x,g[[1L]],g[[2L]],FALSE)), ax)) } else return(setAttributes(.Call(C_fnobsl,x,g[[1L]],g[[2L]],FALSE), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(nam[gn], nam[-gn]) return(setAttributes(c(x[gn],.Call(Cpp_TRAl,x[-gn],.Call(C_fnobsl,x[-gn],g[[1L]],g[[2L]],FALSE),g[[2L]],TtI(TRA))), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(Cpp_TRAl,x[-gn],.Call(C_fnobsl,x[-gn],g[[1L]],g[[2L]],FALSE),g[[2L]],TtI(TRA)), ax)) } else return(.Call(Cpp_TRAl,x,.Call(C_fnobsl,x,g[[1L]],g[[2L]],FALSE),g[[2L]],TtI(TRA))) } fNobs <- function(x, ...) { message("Note that 'fNobs' was renamed to 'fnobs'. The S3 generic will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") UseMethod("fnobs") } fNobs.default <- function(x, ...) { .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fnobs.default(x, ...) } fNobs.matrix <- function(x, ...) { .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fnobs.matrix(x, ...) } fNobs.data.frame <- function(x, ...) { .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fnobs.data.frame(x, ...) } collapse/R/fhdbetween_fhdwithin.R0000644000176200001440000013131214176642305016570 0ustar liggesusers # TODO: More tests for attribute handling + Optimize linear fitting... demean <- function(x, fl, weights, ..., means = FALSE) { if(length(fl) == 1L && is.null(attr(fl, "slope.flag"))) { clx <- oldClass(x) # Need to do this because could call fbetween.grouped_df of fbetween.pseries / pdata.frame if(means) return(`oldClass<-`(fbetween(unclass(x), fl[[1L]], weights, na.rm = FALSE), clx)) else return(`oldClass<-`(fwithin(unclass(x), fl[[1L]], weights, na.rm = FALSE), clx)) } msg <- "For higher-dimensional centering and projecting out interactions need to install.packages('%s'), then unload [detach('package:collapse', unload = TRUE)] and reload [library(collapse)]." res <- getenvFUN("fixest_demean", msg)(x, fl, attr(fl, "slope.vars"), attr(fl, "slope.flag"), weights = weights, ..., notes = FALSE, im_confident = TRUE) if(!means) return(duplAttributes(res, x)) # if(!is.matrix(x)) dim(res) <- NULL # also need for flmres... e.g. with weights... intercept is no longer always added, so res needs to be a matrix... # Need matrix dimensions... for subset in variable.wise... do.call(cbind, fl[!fc]) needs to be preserved... # return(if(means) x - drop(res) else drop(res)) if(is.atomic(res)) return(duplAttributes(x - res, x)) duplAttributes(mapply(`-`, unattrib(x), unattrib(res), SIMPLIFY = FALSE, USE.NAMES = FALSE), x) } myModFrame <- function(f, data) { t <- terms.formula(f) v <- attr(t, "variables") res <- eval(v, data, parent.frame()) # faster than res <- eval(substitute(with(data, e), list(e = v))) attributes(res) <- list(names = as.character(v[-1L]), row.names = .set_row_names(fnrow2(data)), class = "data.frame", terms = t) res } # Example: # mf <- myModFrame( ~ factor(cyl)*poly(carb, 2) + factor(cyl):factor(vs) + factor(cyl):factor(vs):wt + factor(cyl):mpg + factor(am) + factor(hp > 146):qsec + vs + carb:am, data = mtcars) # mf <- myModFrame( ~ factor(cyl)*poly(carb, 2) + factor(cyl):factor(vs) + factor(cyl):mpg + factor(am) + factor(hp > 146):qsec + vs + carb:am, data = mtcars) finteract <- function(x, facts, mf) { # x and facts are logical f <- which(x & facts) if(length(f) == 1L) mf[[f]] else if(length(f) == 2L) do.call(`:`, mf[f]) else as_factor_GRP(GRP.default(mf[f], call = FALSE)) } slinteract <- function(sl, facts, mf) { # sl and facts are logical sl <- which(sl & !facts) res <- if(length(sl) == 1L) mf[[sl]] else do.call(`*`, mf[sl]) if(is.matrix(res)) mctl(res) else list(res) } # This is probably the craziest piece of code in the whole package: # It takes a model.frame as input and computes from it the inputs for both fixest::demean() # and linear model fitting getfl <- function(mf) { facts <- .Call(C_vtypes, mf, 2L) # vapply(unattrib(mf), is.factor, TRUE) # Any factors if(any(facts)) { terms <- attributes(attr(mf, "terms")) clmf <- oldClass(mf) oldClass(mf) <- NULL # good ?? tl <- terms[["term.labels"]] factors <- terms[[2L]] fctterms <- colSums(factors[facts, , drop = FALSE]) > 0 fctinteract <- fctterms & colSums(factors) > 1 # best ?? # Any interactions involving factors if(any(fctinteract)) { single <- rowSums(factors[facts, , drop = FALSE] > 0L) == 1 # These are either single factors or factors only appearing inside an interaction... factors <- factors[, fctinteract, drop = FALSE] nointeract <- rowSums(factors[facts, , drop = FALSE]) == 0 # These are factors not appearing in interactions singlefct <- names(which(single & nointeract)) # better way ?? # tl[fctterms & !fctinteract] intterms <- mctl(factors > 0L, TRUE) # Need names here fctfct <- colSums(factors[!facts, , drop = FALSE]) == 0 # These are factor-factor interactions... fctdat <- NULL # best way to do this ?? or as before with pre-allocation ?? lsf <- length(singlefct) lff <- bsum(fctfct) if(lsf) fctdat <- mf[singlefct] # unattrib() -> wrap around at the end... Nah, better with names... if(lff) fctdat <- c(fctdat, lapply(intterms[fctfct], finteract, TRUE, mf)) # Any heterogenous slopes if(lff != length(intterms)) { intslope <- intterms[!fctfct] slflag <- integer(lsf) factors <- factors[facts, !fctfct, drop = FALSE] dimnames(factors) <- NULL # Could have imp:exp and imp:exp:year, so we need to partial match imp:exp in all slope terms... imc <- im <- pmatch(names(which(fctfct)), names(intslope), nomatch = 0L) # need names to match here !! if(any(im)) { # first the fact:fact in order (only add slopes), then the other ones if(!all(im)) im <- im[im > 0L] # Check for duplicate factors in interactions (largely independent of the other stuff) dupchk <- factors[, -im, drop = FALSE] > 0L # same as intslopes... if(any(dupfct <- rowSums(dupchk) > 1)) { # Check for factors with multiple slopes... if(bsum(dupfct) > 1L) stop("Cannot currently support multiple factors with multiple slopes...") dupfct <- which(dupchk[dupfct, ]) # This accounts for im fctdat <- c(fctdat, lapply(c(intslope[-im][dupfct[1L]], intslope[-im][-dupfct]), finteract, facts, mf)) } else fctdat <- c(fctdat, lapply(intslope[-im], finteract, facts, mf)) # only get factors not already in fctfct... slopes <- lapply(c(intslope[im], intslope[-im]), slinteract, facts, mf) lsl <- lengths(slopes, FALSE) # No names here lim <- seq_along(im) imc[imc > 0L] <- lsl[lim] # This is ok, these are also included elsewhere slflag <- c(slflag, imc) if(length(lsl) != length(lim)) { # The other cases... if exist othmc <- lsl[-lim] if(any(alone <- single & !nointeract)) { alone <- colSums(factors[alone, -im, drop = FALSE]) > 0 # This finds the terms corresponding to a factor appearing in an interaction but nowhere else.. othmc[alone] <- -othmc[alone] } if(any(dupfct)) { # reordering if dupfct... putting it in front.. slopes[-lim] <- c(slopes[-lim][dupfct], slopes[-lim][-dupfct]) othmc <- c(bsum(othmc[dupfct]), othmc[-dupfct]) } slflag <- c(slflag, othmc) } # this shows single factors not interacted... set slflag to negative... # what about double interactions only with slope ??? i.e. only imp:exp:year -> also negative flag... } else { # No double factor interactions with slopes.. Only simple slopes interactions.. (what about dupfact of two different double interactions with slope, but no factfact?) dupchk <- factors > 0L # same as intslopes... if(any(dupfct <- rowSums(dupchk) > 1)) { # Check for factors with multiple slopes... if(bsum(dupfct) > 1L) stop("Cannot currently support multiple factors with multiple slopes...") dupfct <- which(dupchk[dupfct, ]) fctdat <- c(fctdat, lapply(c(intslope[dupfct[1L]], intslope[-dupfct]), finteract, facts, mf)) } else fctdat <- c(fctdat, lapply(intslope, finteract, facts, mf)) slopes <- lapply(intslope, slinteract, facts, mf) # getting slopes, independent of dupfct... lsl <- lengths(slopes, FALSE) if(any(alone <- single & !nointeract)) { # Any factor occurring only inside an interaction... This is independent of dupfact and thre associated reordering... alone <- colSums(factors[alone, , drop = FALSE]) > 0 lsl[alone] <- -lsl[alone] } if(any(dupfct)) { # reordering if dupfct... putting it in front.. slopes <- c(slopes[dupfct], slopes[-dupfct]) lsl <- c(bsum(lsl[dupfct]), lsl[-dupfct]) } slflag <- c(slflag, integer(lff), lsl) } attr(fctdat, "slope.vars") <- unlist(slopes, recursive = FALSE) # , FALSE, FALSE) attr(fctdat, "slope.flag") <- slflag # c(integer(length(fctdat)-length(intslope)), lengths(slopes)) # what about other slopes (not poly??) } # drop unused factor levels ?? } else fctdat <- mf[facts] modelterms <- tl[!fctterms] slflag <- attr(fctdat, "slope.flag") if(length(modelterms)) { # Intercept only needed if facts with only negative slope flag... form <- paste0(if(is.null(slflag) || any(slflag > 0L)) "~ -1 + " else "~ ", paste(modelterms, collapse = " + ")) moddat <- model.matrix.default(as.formula(form), data = `oldClass<-`(mf, clmf)) } else { moddat <- if(is.null(slflag) || any(slflag > 0L)) NULL else alloc(1, length(mf[[1L]])) } } else { fctdat <- NULL moddat <- model.matrix.default(attr(mf, "terms"), data = mf) # .External2(stats:::C_modelmatrix, attr(mf, "terms"), mf) } list(fl = fctdat, xmat = moddat) } # Keeps attributes ? -> Yes ! # fastest way ? or better use vectors ? -> this is faster than lapply(fl, `[`, cc) ! subsetfl <- function(fl, cc) { slopes <- attr(fl, "slope.vars") # fl could be a data.frame, slope vars not (getfl() unclasses) if(is.null(names(fl))) names(fl) <- seq_along(unclass(fl)) if(is.null(slopes)) return(.Call(C_subsetDT, fl, cc, seq_along(unclass(fl)), FALSE)) attr(fl, "slope.vars") <- NULL if(is.null(names(slopes))) names(slopes) <- seq_along(slopes) res <- .Call(C_subsetDT, fl, cc, seq_along(fl), FALSE) attr(res, "slope.vars") <- .Call(C_subsetDT, slopes, cc, seq_along(slopes), FALSE) # fdroplevels ?? res } # Old version: # subsetfl <- function(fl, cc) { # lapply(fl, function(f) { # use CsubsetDT or CsubsetVector ?? also check NA in regressors ?? # x <- attr(f, "x") # if(is.null(x)) return(.Call(C_subsetVector, f, cc, FALSE)) else # return(`attr<-`(.Call(C_subsetVector, f, cc, FALSE), "x", # if(is.matrix(x)) x[cc, , drop = FALSE] else # .Call(C_subsetVector, x, cc, FALSE))) # }) # } # Examples: # str(getfl(myModFrame( ~ cyl + carb, data = mtcars))) # str(getfl(myModFrame( ~ factor(cyl)*carb, data = mtcars))) # str(getfl(myModFrame( ~ factor(cyl) + factor(am), data = mtcars))) # str(getfl(myModFrame( ~ factor(cyl):factor(am), data = mtcars))) # str(getfl(myModFrame( ~ mpg + factor(cyl)*carb, data = mtcars))) # str(getfl(myModFrame( ~ mpg + factor(cyl) + factor(am), data = mtcars))) # str(getfl(myModFrame( ~ mpg + factor(cyl):factor(am), data = mtcars))) # str(getfl(myModFrame( ~ mpg + factor(cyl):factor(am):vs, data = mtcars))) # wow !! # str(getfl(myModFrame( ~ mpg + factor(cyl):factor(am)*vs, data = mtcars))) # wow !! # str(getfl(myModFrame( ~ mpg + factor(cyl):factor(am) + factor(cyl):factor(am):vs, data = mtcars))) # wow !! # str(getfl(myModFrame( ~ mpg + factor(cyl):mpg + factor(am):mpg + factor(cyl):factor(am), data = mtcars))) # str(getfl(model.frame( ~ factor(cyl)*carb + vs + wt:gear + wt:gear:carb, data = mtcars))) # (Weighted) linear model fitting for vectors and lists... # Neded to sort out some insufficiencies of base R default functions when dealing with dimensions `%**%` <- function(x, y) if(length(y) > 1L) x %*% y else x * y tcrossprod2 <- function(x, y) if(length(x) > 1L) tcrossprod(x, y) else `dim<-`(x * y, c(1L, length(y))) # y = x; X = xmat; w = w; meth = lm.method flmres <- function(y, X, w = NULL, meth = "qr", resi = TRUE, ...) { # n <- dim(X)[1L] # if(n != NROW(y)) stop("NROW(y) must match nrow(X)") dimnames(X) <- NULL # faster ?? if(length(w)) { # if(length(w) != n) stop("w must be numeric and length(w) == nrow(X)") wts <- sqrt(w) if(is.atomic(y)) { dimnames(y) <- NULL return(drop(switch(meth, qr = { fit <- X %**% qr.coef(qr(X * wts, ...), y * wts) # same as lm... if(resi) y - fit else fit }, chol = { fit <- X * wts fit <- X %*% chol2inv(chol(crossprod(fit), ...)) %*% crossprod(fit, y * wts) if(resi) y - fit else fit }, stop("Only methods 'qr' and 'chol' are supported")))) } attributes(y) <- NULL return(switch(meth, qr = { calc <- qr(X * wts, ...) if(resi) lapply(y, function(z) drop(z - X %**% qr.coef(calc, z * wts))) else lapply(y, function(z) drop(X %**% qr.coef(calc, z * wts))) }, chol = { calc <- X * wts calc <- X %*% tcrossprod2(chol2inv(chol(crossprod(calc), ...)), calc) if(resi) lapply(y, function(z) drop(z - calc %*% (z * wts))) else lapply(y, function(z) drop(calc %*% (z * wts))) }, stop("Only methods 'qr' and 'chol' are supported"))) } if(is.atomic(y)) { dimnames(y) <- NULL return(drop(switch(meth, qr = if(resi) qr.resid(qr(X, ...), y) else qr.fitted(qr(X, ...), y), chol = { fit <- X %*% chol2inv(chol(crossprod(X), ...)) %*% crossprod(X, y) if(resi) y - fit else fit }, stop("Only methods 'qr' and 'chol' are supported")))) } attributes(y) <- NULL return(switch(meth, qr = { calc <- qr(X, ...) if(resi) lapply(y, function(z) drop(qr.resid(calc, z))) else lapply(y, function(z) drop(qr.fitted(calc, z))) }, chol = { calc <- X %*% tcrossprod2(chol2inv(chol(crossprod(X), ...)), X) if(resi) lapply(y, function(z) drop(z - calc %*% z)) else lapply(y, function(z) drop(calc %*% z)) }, stop("Only methods 'qr' and 'chol' are supported"))) } fhdwithin <- function(x, ...) UseMethod("fhdwithin") # , x fhdwithin.default <- function(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, lm.method = "qr", ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fhdwithin.matrix(x, fl, w, na.rm, fill, ...)) ax <- attributes(x) if(na.rm) { cc <- complete.cases(x, fl, w) # gives error if lengths don't match, otherwise demeanlist and qr.resid give errors !! if(!all(cc)) { ax[["na.rm"]] <- whichv(cc, FALSE) cc <- which(cc) w <- w[cc] # Note this here !! if(!fill) { if(length(names(x))) ax[["names"]] <- names(x)[cc] # best ?? x <- x[cc] } } else na.rm <- FALSE } if(is.list(fl)) { fc <- .Call(C_vtypes, fl, 2L) # vapply(unattrib(fl), is.factor, TRUE) fcl <- any(fc) # if(!fcl && !missing(...)) unused_arg_action(match.call(), ...) nallfc <- fcl && !all(fc) if(na.rm) fl <- subsetfl(fl, cc) attributes(fl) <- NULL # good here ?? if(nallfc) { xmat <- demean(do.call(cbind, fl[!fc]), fl[fc], w, ...) fl <- fl[fc] } else if(!fcl) xmat <- do.call(cbind, c(list(alloc(1L, length(fl[[1L]]))), fl)) } else if(is.matrix(fl)) { # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc, , drop = FALSE]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } else if(is.factor(fl)) { fl <- if(na.rm) list(fl[cc]) else list(fl) fcl <- TRUE nallfc <- FALSE } else { if(!is.numeric(fl)) stop("fl must be a list of vectors / factors, a numeric matrix or a numeric vector") # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } if(nallfc || !fcl) { if(na.rm && fill) { x[-cc] <- NA x[cc] <- flmres(if(nallfc) demean(x[cc], fl, w, ...) else x[cc], xmat, w, lm.method, ...) return(setAttributes(x, ax)) } else return(setAttributes(flmres(if(nallfc) demean(x, fl, w, ...) else x, xmat, w, lm.method, ...), ax)) } else if(na.rm && fill) { x[-cc] <- NA x[cc] <- demean(x[cc], fl, w, ...) return(setAttributes(x, ax)) } else return(setAttributes(demean(x, fl, w, ...), ax)) } fhdwithin.pseries <- function(x, effect = "all", w = NULL, na.rm = TRUE, fill = TRUE, ...) { ix <- getpix(attr(x, "index")) namix <- attr(ix, "names") if(is.character(effect) && length(effect) == 1L && effect == "all") { effect <- seq_along(namix) } else effect <- cols2int(effect, ix, namix) g <- .subset(ix, effect) if(na.rm && length(cc <- whichv(x, NA, TRUE)) != length(x)) { g <- .Call(C_subsetDT, g, cc, seq_along(g), FALSE) # lapply(g, `[`, cc) -> slower ! if(fill) { x[cc] <- demean(.subset(`names<-`(x, NULL), cc), g, w[cc], ...) # keeps attributes ?? -> Yes !! return(x) } xcc <- .subset(x, cc) nix <- length(unclass(ix)) if(nix != length(g)) { toss <- seq_len(nix)[-effect] reix <- copyMostAttributes(c(.Call(C_subsetDT, ix, cc, toss, FALSE), g)[namix], ix) } else reix <- copyMostAttributes(g, ix) attr(reix, "row.names") <- .set_row_names(length(cc)) return(setAttributes(demean(xcc, g, w[cc], ...), c(attributes(xcc), list(index = reix, na.rm = seq_along(x)[-cc])))) } demean(x, g, w, ...) # keeps attributes ?? -> Yes !! } # x = mNA; fl = m; lm.method = "qr" fhdwithin.matrix <- function(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, lm.method = "qr", ...) { ax <- attributes(x) if(na.rm) { cc <- complete.cases(x, fl, w) # gives error if lengths don't match, otherwise demeanlist and qr.resid give errors !! if(!all(cc)) { ax[["na.rm"]] <- whichv(cc, FALSE) cc <- which(cc) w <- w[cc] if(!fill) { if(length(dimnames(x)[[1L]])) ax[["dimnames"]][[1L]] <- dimnames(x)[[1L]][cc] # best ?? ax[["dim"]][1L] <- length(cc) x <- x[cc, , drop = FALSE] } } else na.rm <- FALSE } if(is.list(fl)) { fc <- .Call(C_vtypes, fl, 2L) # vapply(unattrib(fl), is.factor, TRUE) fcl <- any(fc) # if(!fcl && !missing(...)) unused_arg_action(match.call(), ...) nallfc <- fcl && !all(fc) if(na.rm) fl <- subsetfl(fl, cc) attributes(fl) <- NULL # good here ?? if(nallfc) { xmat <- demean(do.call(cbind, fl[!fc]), fl[fc], w, ...) fl <- fl[fc] } else if(!fcl) xmat <- do.call(cbind, c(list(alloc(1L, length(fl[[1L]]))), fl)) } else if(is.matrix(fl)) { # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc, , drop = FALSE]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } else if(is.factor(fl)) { fl <- if(na.rm) list(fl[cc]) else list(fl) fcl <- TRUE nallfc <- FALSE } else { if(!is.numeric(fl)) stop("fl must be a list of vectors / factors, a numeric matrix or a numeric vector") # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } if(nallfc || !fcl) { if(na.rm && fill) { x[-cc, ] <- NA # What about weights cc ????? x[cc, ] <- flmres(if(nallfc) demean(x[cc, ], fl, w, ...) else x[cc, ], xmat, w, lm.method, ...) return(setAttributes(x, ax)) } else return(setAttributes(flmres(if(nallfc) demean(x, fl, w, ...) else x, xmat, w, lm.method, ...), ax)) } else if(na.rm && fill) { x[-cc, ] <- NA x[cc, ] <- demean(x[cc, ], fl, w, ...) return(setAttributes(x, ax)) } else return(setAttributes(demean(x, fl, w, ...), ax)) } # x = collapse:::colsubset(pwlddev, is.numeric) fhdwithin.pdata.frame <- function(x, effect = "all", w = NULL, na.rm = TRUE, fill = TRUE, variable.wise = TRUE, ...) { ix <- getpix(attr(x, "index")) namix <- attr(ix, "names") if(is.character(effect) && length(effect) == 1L && effect == "all") { effect <- seq_along(namix) } else effect <- cols2int(effect, ix, namix) g <- .subset(ix, effect) if(na.rm && fill && variable.wise) { ax <- attributes(x) attributes(x) <- NULL varwisecomp <- function(x, fl, w, ...) lapply(x, function(y) { ycc <- whichv(y, NA, TRUE) y[ycc] <- demean(.subset(y, ycc), subsetfl(fl, ycc), w[ycc], ...) return(y) }) return(setAttributes(varwisecomp(x, g, w, ...), ax)) } else if(na.rm && any(miss <- .Call(C_dt_na, x, seq_along(unclass(x))))) { cc <- whichv(miss, FALSE) gcc <- .Call(C_subsetDT, g, cc, seq_along(g), FALSE) Y <- demean(.Call(C_subsetDT, x, cc, seq_along(unclass(x)), FALSE), gcc, w[cc], ...) if(fill) { ax <- attributes(x) ax[["na.rm"]] <- which(miss) return(setAttributes(.Call(C_lassign, Y, fnrow2(x), cc, NA_real_), ax)) } attr(Y, "row.names") <- attr(x, "row.names")[cc] # row.names of pdata.frame are special. nix <- length(unclass(ix)) if(nix != length(g)) { toss <- seq_len(nix)[-effect] reix <- copyMostAttributes(c(.Call(C_subsetDT, ix, cc, toss, FALSE), gcc)[namix], ix) } else reix <- copyMostAttributes(gcc, ix) attr(reix, "row.names") <- .set_row_names(length(cc)) attr(Y, "index") <- reix attr(Y, "na.rm") <- which(miss) return(Y) } else return(demean(x, g, w, ...)) # setAttributes(, ax) -> Not needed anymore (included in demean()) } # x = data[5:6]; fl = data[-(5:6)]; variable.wise = TRUE fhdwithin.data.frame <- function(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, variable.wise = FALSE, lm.method = "qr", ...) { ax <- attributes(x) if(na.rm) { cc <- if(variable.wise) complete.cases(fl, w) else complete.cases(x, fl, w) # gives error if lengths don't match, otherwise demeanlist and qr.resid give errors !! if(!all(cc)) { ax[["na.rm"]] <- whichv(cc, FALSE) cc <- which(cc) w <- w[cc] if(!variable.wise) { if(fill) nrx <- fnrow2(x) else ax[["row.names"]] <- ax[["row.names"]][cc] # best ?? x <- .Call(C_subsetDT, x, cc, seq_along(unclass(x)), FALSE) } } else na.rm <- FALSE } if(is.list(fl)) { # fl is a list !! fc <- .Call(C_vtypes, fl, 2L) # vapply(unattrib(fl), is.factor, TRUE) fcl <- any(fc) # if(!fcl && !missing(...)) unused_arg_action(match.call(), ...) nallfc <- fcl && !all(fc) if(na.rm) fl <- subsetfl(fl, cc) attributes(fl) <- NULL # good here ?? if(nallfc) { xmat <- demean(do.call(cbind, fl[!fc]), fl[fc], w, ...) fl <- fl[fc] } else if(!fcl) xmat <- do.call(cbind, c(list(alloc(1L, length(fl[[1L]]))), fl)) } else if(is.matrix(fl)) { # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc, , drop = FALSE]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } else if(is.factor(fl)) { fl <- if(na.rm) list(fl[cc]) else list(fl) fcl <- TRUE nallfc <- FALSE } else { if(!is.numeric(fl)) stop("fl must be a list of vectors / factors, a numeric matrix or a numeric vector") # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } if(variable.wise) { if(na.rm) { # this means there were mising values in fl, which were already removed! return(setAttributes(lapply(unattrib(x), function(y) { y[-cc] <- NA # which is not faster !! ycc <- whichv(y, NA, TRUE) YC <- whichv(y[cc], NA, TRUE) wc <- w[YC] y[ycc] <- if(nallfc) flmres(demean(y[ycc], subsetfl(fl, YC), wc, ...), xmat[YC, , drop = FALSE], wc, lm.method, ...) else if(fcl) demean(y[ycc], subsetfl(fl, YC), wc, ...) else flmres(y[ycc], xmat[YC, , drop = FALSE], wc, lm.method, ...) return(y) }), ax)) } return(setAttributes(lapply(unattrib(x), function(y) { ycc <- whichv(y, NA, TRUE) wc <- w[ycc] y[ycc] <- if(nallfc) flmres(demean(y[ycc], subsetfl(fl, ycc), wc, ...), xmat[ycc, , drop = FALSE], wc, lm.method, ...) else if(fcl) demean(y[ycc], subsetfl(fl, ycc), wc, ...) else flmres(y[ycc], xmat[ycc, , drop = FALSE], wc, lm.method, ...) return(y) }), ax)) # Rfast fastlm?? } else { # at this point missing values are already removed from x and fl !! Y <- if(nallfc || !fcl) flmres(if(nallfc) demean(x, fl, w, ...) else x, xmat, w, lm.method, ...) else demean(x, fl, w, ...) if(na.rm && fill) # x[cc, ] <- Y; x[-cc, ] <- NA return(setAttributes(.Call(C_lassign, Y, nrx, cc, NA_real_), ax)) return(setAttributes(Y, ax)) } } # Note: could also do Mudlack and add means to second regression -> better than two-times centering ?? HDW <- function(x, ...) UseMethod("HDW") # , x HDW.default <- function(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, lm.method = "qr", ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(HDW.matrix(x, fl, w, na.rm, fill, lm.method, ...)) fhdwithin.default(x, fl, w, na.rm, fill, lm.method, ...) } HDW.pseries <- function(x, effect = "all", w = NULL, na.rm = TRUE, fill = TRUE, ...) fhdwithin.pseries(x, effect, w, na.rm, fill, ...) HDW.matrix <- function(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, stub = "HDW.", lm.method = "qr", ...) add_stub(fhdwithin.matrix(x, fl, w, na.rm, fill, lm.method, ...), stub) # x = mtcars; fl = ~ qF(cyl):carb; w = wdat; stub = FALSE HDW.data.frame <- function(x, fl, w = NULL, cols = is.numeric, na.rm = TRUE, fill = FALSE, variable.wise = FALSE, stub = "HDW.", lm.method = "qr", ...) { if(is.call(fl)) { ax <- attributes(x) nam <- ax[["names"]] if(length(fl) == 3L) { fvars <- ckmatch(all.vars(fl[[3L]]), nam) Xvars <- ckmatch(all.vars(fl[[2L]]), nam) fl[[2L]] <- NULL } else { fvars <- ckmatch(all.vars(fl), nam) Xvars <- if(length(cols)) fsetdiff(cols2int(cols, x, nam), fvars) else seq_along(unclass(x))[-fvars] } ax[["names"]] <- if(is.character(stub)) paste0(stub, nam[Xvars]) else nam[Xvars] if(na.rm) { miss <- if(variable.wise) .Call(C_dt_na, x, fvars) else .Call(C_dt_na, x, c(Xvars, fvars)) if(missw <- length(w) && anyNA(w)) miss <- miss | is.na(w) if(missw || any(miss)) { ax[["na.rm"]] <- which(miss) cc <- whichv(miss, FALSE) w <- w[cc] if(!variable.wise) if(fill) nrx <- fnrow2(x) else ax[["row.names"]] <- ax[["row.names"]][cc] # best ?? } else na.rm <- FALSE } xmat <- NULL list2env(getfl(myModFrame(fl, if(na.rm) .Call(C_subsetDT, x, cc, fvars, FALSE) else .subset(x, fvars))), envir = environment()) fcl <- !is.null(fl) nallfc <- fcl && !is.null(xmat) if(nallfc) xmat <- demean(xmat, fl, w, ...) if(variable.wise) { if(na.rm) { return(setAttributes(lapply(.subset(x, Xvars), function(y) { y[-cc] <- NA ycc <- whichv(y, NA, TRUE) YC <- whichv(y[cc], NA, TRUE) wc <- w[YC] y[ycc] <- if(nallfc) flmres(demean(y[ycc], subsetfl(fl, YC), wc, ...), xmat[YC, , drop = FALSE], wc, lm.method, ...) else if(fcl) demean(y[ycc], subsetfl(fl, YC), wc, ...) else flmres(y[ycc], xmat[YC, , drop = FALSE], wc, lm.method, ...) return(y) }), ax)) } return(setAttributes(lapply(.subset(x, Xvars), function(y) { ycc <- whichv(y, NA, TRUE) wc <- w[ycc] y[ycc] <- if(nallfc) flmres(demean(y[ycc], subsetfl(fl, ycc), wc, ...), xmat[ycc, , drop = FALSE], wc, lm.method, ...) else if(fcl) demean(y[ycc], subsetfl(fl, ycc), wc, ...) else flmres(y[ycc], xmat[ycc, , drop = FALSE], wc, lm.method, ...) return(y) }), ax)) } else { # at this point missing values are already removed from fl !! Y <- if(na.rm) .Call(C_subsetDT, x, cc, Xvars, FALSE) else .subset(x, Xvars) Y <- if(nallfc || !fcl) flmres(if(nallfc) demean(Y, fl, w, ...) else Y, xmat, w, lm.method, ...) else demean(Y, fl, w, ...) if(na.rm && fill) # x[cc, ] <- Y; x[-cc, ] <- NA return(setAttributes(.Call(C_lassign, Y, nrx, cc, NA_real_), ax)) return(setAttributes(Y, ax)) } } # fl is not a formula !! add_stub(fhdwithin.data.frame(if(is.null(cols)) x else colsubset(x, cols), fl, w, na.rm, fill, variable.wise, lm.method, ...), stub) } HDW.pdata.frame <- function(x, effect = "all", w = NULL, cols = is.numeric, na.rm = TRUE, fill = TRUE, variable.wise = TRUE, stub = "HDW.", ...) add_stub(fhdwithin.pdata.frame(if(is.null(cols)) x else colsubset(x, cols), effect, w, na.rm, fill, variable.wise, ...), stub) # Theory: y = ?1 x1 + ?2 x2 + e # FWT: M2 y = ?1 M2 x1 + e so residuals: e = M2 y - ?1 M2 x1 and fitted: # Now M = I - x(x'x)-1x' = I - P. # So (I-P2) y = ?1 (I-P2) x1 + e or y - P2 y = ?1 x1 - ?1 P2 x1 + e # I want y - e = y^ = ?1 x1 + ?2 x2 # so # P2 y = ?1 P2 x1 + ?2 x2 # Haven't quite figgured it out, but my solution is to just subtract the demeaned data !! # Note: Only changes to fhdwithin is in the computation part: Perhaps you can combine the code in some better way to reduce code duplication ?? fhdbetween <- function(x, ...) UseMethod("fhdbetween") # , x fhdbetween.default <- function(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, lm.method = "qr", ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fhdwithin.matrix(x, fl, w, na.rm, fill, lm.method, ...)) ax <- attributes(x) if(na.rm) { cc <- complete.cases(x, fl, w) # gives error if lengths don't match, otherwise demeanlist and qr.resid give errors !! if(!all(cc)) { ax[["na.rm"]] <- whichv(cc, FALSE) cc <- which(cc) w <- w[cc] # Note this here !! if(!fill) { if(length(names(x))) ax[["names"]] <- names(x)[cc] # best ?? x <- x[cc] } } else na.rm <- FALSE } if(is.list(fl)) { fc <- .Call(C_vtypes, fl, 2L) # vapply(unattrib(fl), is.factor, TRUE) fcl <- any(fc) # if(!fcl && !missing(...)) unused_arg_action(match.call(), ...) nallfc <- fcl && !all(fc) if(na.rm) fl <- subsetfl(fl, cc) attributes(fl) <- NULL # good here ?? if(nallfc) { xmat <- demean(do.call(cbind, fl[!fc]), fl[fc], w, ...) fl <- fl[fc] } else if(!fcl) xmat <- do.call(cbind, c(list(alloc(1L, length(fl[[1L]]))), fl)) } else if(is.matrix(fl)) { # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc, , drop = FALSE]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } else if(is.factor(fl)) { fl <- if(na.rm) list(fl[cc]) else list(fl) fcl <- TRUE nallfc <- FALSE } else { if(!is.numeric(fl)) stop("fl must be a list of vectors / factors, a numeric matrix or a numeric vector") # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } # Only this part of the code is different from fhdwithin... if(nallfc || !fcl) { if(na.rm && fill) { x[-cc] <- NA xcc <- x[cc] x[cc] <- if(nallfc) xcc - flmres(demean(xcc, fl, w, ...), xmat, w, lm.method, ...) else flmres(xcc, xmat, w, lm.method, FALSE, ...) return(setAttributes(x, ax)) } else return(setAttributes(if(nallfc) x - flmres(demean(x, fl, w, ...), xmat, w, lm.method, ...) else flmres(x, xmat, w, lm.method, FALSE, ...), ax)) } else if(na.rm && fill) { x[-cc] <- NA x[cc] <- demean(x[cc], fl, w, ..., means = TRUE) return(setAttributes(x, ax)) } else return(setAttributes(demean(x, fl, w, ..., means = TRUE), ax)) } fhdbetween.pseries <- function(x, effect = "all", w = NULL, na.rm = TRUE, fill = TRUE, ...) fhdwithin.pseries(x, effect, w, na.rm, fill, ..., means = TRUE) fhdbetween.matrix <- function(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, lm.method = "qr", ...) { ax <- attributes(x) if(na.rm) { cc <- complete.cases(x, fl, w) # gives error if lengths don't match, otherwise demeanlist and qr.resid give errors !! if(!all(cc)) { ax[["na.rm"]] <- whichv(cc, FALSE) cc <- which(cc) w <- w[cc] if(!fill) { if(length(dimnames(x)[[1L]])) ax[["dimnames"]][[1L]] <- dimnames(x)[[1L]][cc] # best ?? ax[["dim"]][1L] <- length(cc) x <- x[cc, , drop = FALSE] } } else na.rm <- FALSE } if(is.list(fl)) { fc <- .Call(C_vtypes, fl, 2L) # vapply(unattrib(fl), is.factor, TRUE) fcl <- any(fc) # if(!fcl && !missing(...)) unused_arg_action(match.call(), ...) nallfc <- fcl && !all(fc) if(na.rm) fl <- subsetfl(fl, cc) attributes(fl) <- NULL # good here ?? if(nallfc) { xmat <- demean(do.call(cbind, fl[!fc]), fl[fc], w, ...) fl <- fl[fc] } else if(!fcl) xmat <- do.call(cbind, c(list(alloc(1L, length(fl[[1L]]))), fl)) } else if(is.matrix(fl)) { # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc, , drop = FALSE]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } else if(is.factor(fl)) { fl <- if(na.rm) list(fl[cc]) else list(fl) fcl <- TRUE nallfc <- FALSE } else { if(!is.numeric(fl)) stop("fl must be a list of vectors / factors, a numeric matrix or a numeric vector") # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } # Only this part of the code is different from fhdwithin... if(nallfc || !fcl) { if(na.rm && fill) { x[-cc, ] <- NA xcc <- x[cc, ] # What about weights cc ? -> done above... x[cc, ] <- if(nallfc) xcc - flmres(demean(xcc, fl, w, ...), xmat, w, lm.method, ...) else flmres(xcc, xmat, w, lm.method, FALSE, ...) return(setAttributes(x, ax)) } else return(setAttributes(if(nallfc) x - flmres(demean(x, fl, w, ...), xmat, w, lm.method, ...) else flmres(x, xmat, w, lm.method, FALSE, ...), ax)) } else if(na.rm && fill) { x[-cc, ] <- NA x[cc, ] <- demean(x[cc, ], fl, w, ..., means = TRUE) return(setAttributes(x, ax)) } else return(setAttributes(demean(x, fl, w, ..., means = TRUE), ax)) } fhdbetween.pdata.frame <- function(x, effect = "all", w = NULL, na.rm = TRUE, fill = TRUE, variable.wise = TRUE, ...) fhdwithin.pdata.frame(x, effect, w, na.rm, fill, variable.wise, ..., means = TRUE) fhdbetween.data.frame <- function(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, variable.wise = FALSE, lm.method = "qr", ...) { ax <- attributes(x) if(na.rm) { cc <- if(variable.wise) complete.cases(fl, w) else complete.cases(x, fl, w) # gives error if lengths don't match, otherwise demeanlist and qr.resid give errors !! if(!all(cc)) { ax[["na.rm"]] <- whichv(cc, FALSE) cc <- which(cc) w <- w[cc] if(!variable.wise) { if(fill) nrx <- fnrow2(x) else ax[["row.names"]] <- ax[["row.names"]][cc] # best ?? x <- .Call(C_subsetDT, x, cc, seq_along(unclass(x)), FALSE) } } else na.rm <- FALSE } if(is.list(fl)) { # fl is a list !! fc <- .Call(C_vtypes, fl, 2L) # vapply(unattrib(fl), is.factor, TRUE) fcl <- any(fc) # if(!fcl && !missing(...)) unused_arg_action(match.call(), ...) nallfc <- fcl && !all(fc) if(na.rm) fl <- subsetfl(fl, cc) attributes(fl) <- NULL # good here ?? if(nallfc) { xmat <- demean(do.call(cbind, fl[!fc]), fl[fc], w, ...) fl <- fl[fc] } else if(!fcl) xmat <- do.call(cbind, c(list(alloc(1L, length(fl[[1L]]))), fl)) } else if(is.matrix(fl)) { # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc, , drop = FALSE]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } else if(is.factor(fl)) { fl <- if(na.rm) list(fl[cc]) else list(fl) fcl <- TRUE nallfc <- FALSE } else { if(!is.numeric(fl)) stop("fl must be a list of vectors / factors, a numeric matrix or a numeric vector") # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } # Only this part of the code is different from fhdwithin !! if(variable.wise) { if(na.rm) { # this means there were mising values in fl, which were already removed! return(setAttributes(lapply(unattrib(x), function(y) { y[-cc] <- NA # which is not faster !! ycc <- whichv(y, NA, TRUE) YC <- whichv(y[cc], NA, TRUE) wc <- w[YC] yycc <- y[ycc] y[ycc] <- if(nallfc) yycc - flmres(demean(yycc, subsetfl(fl, YC), wc, ...), xmat[YC, , drop = FALSE], wc, lm.method, ...) else if(fcl) demean(yycc, subsetfl(fl, YC), wc, ..., means = TRUE) else flmres(yycc, xmat[YC, , drop = FALSE], wc, lm.method, FALSE, ...) return(y) }), ax)) } return(setAttributes(lapply(unattrib(x), function(y) { ycc <- whichv(y, NA, TRUE) wc <- w[ycc] yycc <- y[ycc] y[ycc] <- if(nallfc) yycc - flmres(demean(yycc, subsetfl(fl, ycc), wc, ...), xmat[ycc, , drop = FALSE], wc, lm.method, ...) else if(fcl) demean(yycc, subsetfl(fl, ycc), wc, ..., means = TRUE) else flmres(yycc, xmat[ycc, , drop = FALSE], wc, lm.method, FALSE, ...) return(y) }), ax)) # Rfast fastlm?? } else { # at this point missing values are already removed from x and fl !! if(nallfc || !fcl) { Y <- if(nallfc) x - flmres(demean(x, fl, w, ...), xmat, w, lm.method, ...) else flmres(x, xmat, w, lm.method, FALSE, ...) } else Y <- demean(x, fl, w, ..., means = TRUE) if(na.rm && fill) # x[cc, ] <- Y; x[-cc, ] <- NA return(setAttributes(.Call(C_lassign, Y, nrx, cc, NA_real_), ax)) return(setAttributes(Y, ax)) } } HDB <- function(x, ...) UseMethod("HDB") # , x HDB.default <- function(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, lm.method = "qr", ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(HDB.matrix(x, fl, w, na.rm, fill, lm.method, ...)) fhdbetween.default(x, fl, w, na.rm, fill, lm.method, ...) } HDB.pseries <- function(x, effect = "all", w = NULL, na.rm = TRUE, fill = TRUE, ...) fhdwithin.pseries(x, effect, w, na.rm, fill, ..., means = TRUE) HDB.matrix <- function(x, fl, w = NULL, na.rm = TRUE, fill = FALSE, stub = "HDB.", lm.method = "qr", ...) add_stub(fhdbetween.matrix(x, fl, w, na.rm, fill, lm.method, ...), stub) HDB.data.frame <- function(x, fl, w = NULL, cols = is.numeric, na.rm = TRUE, fill = FALSE, variable.wise = FALSE, stub = "HDB.", lm.method = "qr", ...) { if(is.call(fl)) { ax <- attributes(x) nam <- ax[["names"]] if(length(fl) == 3L) { fvars <- ckmatch(all.vars(fl[[3L]]), nam) Xvars <- ckmatch(all.vars(fl[[2L]]), nam) fl[[2L]] <- NULL } else { fvars <- ckmatch(all.vars(fl), nam) Xvars <- if(length(cols)) fsetdiff(cols2int(cols, x, nam), fvars) else seq_along(unclass(x))[-fvars] } ax[["names"]] <- if(is.character(stub)) paste0(stub, nam[Xvars]) else nam[Xvars] if(na.rm) { miss <- if(variable.wise) .Call(C_dt_na, x, fvars) else .Call(C_dt_na, x, c(Xvars, fvars)) if(missw <- length(w) && anyNA(w)) miss <- miss | is.na(w) if(missw || any(miss)) { ax[["na.rm"]] <- which(miss) cc <- whichv(miss, FALSE) w <- w[cc] if(!variable.wise) if(fill) nrx <- fnrow2(x) else ax[["row.names"]] <- ax[["row.names"]][cc] # best ?? } else na.rm <- FALSE } xmat <- NULL list2env(getfl(myModFrame(fl, if(na.rm) .Call(C_subsetDT, x, cc, fvars, FALSE) else .subset(x, fvars))), envir = environment()) fcl <- !is.null(fl) nallfc <- fcl && !is.null(xmat) if(nallfc) xmat <- demean(xmat, fl, w, ...) # Only this part of the code is different from fhdwithin !! if(variable.wise) { if(na.rm) { # this means there were mising values in fl, which were already removed! return(setAttributes(lapply(unattrib(x), function(y) { y[-cc] <- NA # which is not faster !! ycc <- whichv(y, NA, TRUE) YC <- whichv(y[cc], NA, TRUE) wc <- w[YC] yycc <- y[ycc] y[ycc] <- if(nallfc) yycc - flmres(demean(yycc, subsetfl(fl, YC), wc, ...), xmat[YC, , drop = FALSE], wc, lm.method, ...) else if(fcl) demean(yycc, subsetfl(fl, YC), wc, ..., means = TRUE) else flmres(yycc, xmat[YC, , drop = FALSE], wc, lm.method, FALSE, ...) return(y) }), ax)) } return(setAttributes(lapply(unattrib(x), function(y) { ycc <- whichv(y, NA, TRUE) wc <- w[ycc] yycc <- y[ycc] y[ycc] <- if(nallfc) yycc - flmres(demean(yycc, subsetfl(fl, ycc), wc, ...), xmat[ycc, , drop = FALSE], wc, lm.method, ...) else if(fcl) demean(yycc, subsetfl(fl, ycc), wc, ..., means = TRUE) else flmres(yycc, xmat[ycc, , drop = FALSE], wc, lm.method, FALSE, ...) return(y) }), ax)) } else { # at this point missing values are already removed from fl !! x <- if(na.rm) .Call(C_subsetDT, x, cc, Xvars, FALSE) else .subset(x, Xvars) if(nallfc || !fcl) { Y <- if(nallfc) x - flmres(demean(x, fl, w, ...), xmat, w, lm.method, ...) else flmres(x, xmat, w, lm.method, FALSE, ...) } else Y <- demean(x, fl, w, ..., means = TRUE) if(na.rm && fill) # x[cc, ] <- Y; x[-cc, ] <- NA return(setAttributes(.Call(C_lassign, Y, nrx, cc, NA_real_), ax)) return(setAttributes(Y, ax)) } } # fl is not a formula !! add_stub(fhdbetween.data.frame(if(is.null(cols)) x else colsubset(x, cols), fl, w, na.rm, fill, variable.wise, lm.method, ...), stub) } HDB.pdata.frame <- function(x, effect = "all", w = NULL, cols = is.numeric, na.rm = TRUE, fill = TRUE, variable.wise = TRUE, stub = "HDB.", ...) add_stub(fhdwithin.pdata.frame(if(is.null(cols)) x else colsubset(x, cols), effect, w, na.rm, fill, variable.wise, ..., means = TRUE), stub) fHDbetween <- function(x, ...) { message("Note that 'fHDbetween' was renamed to 'fhdbetween'. The S3 generic will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") UseMethod("fhdbetween") } fHDbetween.default <- function(x, ...) { .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fhdbetween.default(x, ...) } fHDbetween.matrix <- function(x, ...) { .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fhdbetween.matrix(x, ...) } fHDbetween.data.frame <- function(x, ...) { .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fhdbetween.data.frame(x, ...) } fHDwithin <- function(x, ...) { message("Note that 'fHDwithin' was renamed to 'fhdwithin'. The S3 generic will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") UseMethod("fhdwithin") } fHDwithin.default <- function(x, ...) { .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fhdwithin.default(x, ...) } fHDwithin.matrix <- function(x, ...) { .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fhdwithin.matrix(x, ...) } fHDwithin.data.frame <- function(x, ...) { .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fhdwithin.data.frame(x, ...) } # # HDW(x = mtcars, fl = ~ factor(cyl)*carb) # # HDW(x = mtcars, fl = ~ factor(cyl):vs) # # lm(mpg ~ factor(cyl):factor(vs), data = mtcars) # # HDW(x = mtcars, fl = ~ factor(cyl)*carb + vs + wt:gear + wt:gear:carb) # # # Works!! although there is a further interaction with carb!! # lm(mpg ~ hp, data = HDW(mtcars, ~ factor(cyl)*carb + vs + wt:gear + wt:gear:carb)) # lm(mpg ~ hp + factor(cyl)*carb + vs + wt:gear + wt:gear:carb, data = mtcars) # # lm(mpg ~ hp, data = HDW(mtcars, ~ factor(cyl)*carb + vs + wt:gear)) # lm(mpg ~ hp + factor(cyl)*carb + vs + wt:gear, data = mtcars) # # lm(mpg ~ hp, data = HDW(mtcars, ~ cyl*carb + vs + wt:gear)) # lm(mpg ~ hp + cyl*carb + vs + wt:gear, data = mtcars) # # # lm(mpg ~ hp, data = HDW(mtcars, mpg + hp ~ cyl*carb + factor(cyl)*poly(drat,2))) # lm(mpg ~ hp + cyl*carb + factor(cyl)*poly(drat,2), data = mtcars) # collapse/R/fmean.R0000644000176200001440000002143014172367040013472 0ustar liggesusers # Note: for principal innovations of this code see fsum.R fmean <- function(x, ...) UseMethod("fmean") # , x fmean.default <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fmean.matrix(x, g, w, TRA, na.rm, use.g.names, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fmean,x,0L,0L,NULL,w,na.rm)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(Cpp_fmean,x,length(lev),g,NULL,w,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fmean,x,fnlevels(g),g,NULL,w,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fmean,x,attr(g,"N.groups"),g,NULL,w,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(Cpp_fmean,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm), GRPnames(g))) return(.Call(Cpp_fmean,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm)) } if(is.null(g)) return(.Call(Cpp_TRA,x,.Call(Cpp_fmean,x,0L,0L,NULL,w,na.rm),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRA,x,.Call(Cpp_fmean,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm),g[[2L]],TtI(TRA)) } fmean.matrix <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fmeanm,x,0L,0L,NULL,w,na.rm,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(Cpp_fmeanm,x,length(lev),g,NULL,w,na.rm,drop), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(Cpp_fmeanm,x,fnlevels(g),g,NULL,w,na.rm,drop)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fmeanm,x,attr(g,"N.groups"),g,NULL,w,na.rm,drop)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(Cpp_fmeanm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,drop), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(Cpp_fmeanm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,drop)) } if(is.null(g)) return(.Call(Cpp_TRAm,x,.Call(Cpp_fmeanm,x,0L,0L,NULL,w,na.rm,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAm,x,.Call(Cpp_fmeanm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,drop),g[[2L]],TtI(TRA)) } fmean.data.frame <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fmeanl,x,0L,0L,NULL,w,na.rm,drop)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(Cpp_fmeanl,x,length(lev),g,NULL,w,na.rm,drop), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fmeanl,x,fnlevels(g),g,NULL,w,na.rm,drop)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fmeanl,x,attr(g,"N.groups"),g,NULL,w,na.rm,drop)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(Cpp_fmeanl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,drop), groups)) return(.Call(Cpp_fmeanl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,drop)) } if(is.null(g)) return(.Call(Cpp_TRAl,x,.Call(Cpp_fmeanl,x,0L,0L,NULL,w,na.rm,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAl,x,.Call(Cpp_fmeanl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,drop),g[[2L]],TtI(TRA)) } fmean.list <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) fmean.data.frame(x, g, w, TRA, na.rm, use.g.names, drop, ...) fmean.grouped_df <- function(x, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- l1orn(as.character(substitute(w)), NULL) nam <- attr(x, "names") gn2 <- gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) sumw <- NULL if(length(wsym) && length(wn <- whichv(nam, wsym))) { w <- .subset2(x, wn) # faster using unclass? if(any(gn == wn)) stop("Weights coincide with grouping variables!") gn <- c(gn, wn) if(keep.w) { if(nTRAl) sumw <- `names<-`(list(fsumC(w,g[[1L]],g[[2L]],NULL,na.rm)), paste0("sum.", wsym)) else if(keep.group_vars) gn2 <- gn else sumw <- gn2 <- wn } } gl <- length(gn) > 0L # necessary here, not before ! if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], names(sumw), nam[-gn]) return(setAttributes(c(g[[4L]], sumw, .Call(Cpp_fmeanl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE)), ax)) } ax[["names"]] <- c(names(sumw), nam[-gn]) return(setAttributes(c(sumw, .Call(Cpp_fmeanl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE)), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]], .Call(Cpp_fmeanl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE)), ax)) } else return(setAttributes(.Call(Cpp_fmeanl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE), ax)) } else if(keep.group_vars || (keep.w && length(sumw))) { ax[["names"]] <- c(nam[gn2], nam[-gn]) return(setAttributes(c(x[gn2],.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fmeanl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE),g[[2L]],TtI(TRA))), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fmeanl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE),g[[2L]],TtI(TRA)), ax)) } else return(.Call(Cpp_TRAl,x,.Call(Cpp_fmeanl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE),g[[2L]],TtI(TRA))) } # Previous Version: With deparse(substitute(w)) and only keeping grouping columns if found in x. # fmean.grouped_df <- function(x, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, # keep.group_vars = TRUE, keep.w = TRUE, ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) # g <- GRP.grouped_df(x, call = FALSE) # wsym <- deparse(substitute(w)) # nam <- attr(x, "names") # gn2 <- gn <- which(nam %in% g[[5L]]) # nTRAl <- is.null(TRA) # sumw <- NULL # # if(length(wsym) && length(wn <- whichv(nam, wsym))) { # w <- .subset2(x, wn) # faster using unclass?? # if(any(gn == wn)) stop("Weights coincide with grouping variables!") # onlyw <- !length(gn) # gn <- c(gn, wn) # if(keep.w) { # if(nTRAl) sumw <- `names<-`(list(fsumC(w,g[[1L]],g[[2L]],NULL,na.rm)), paste0("sum.", wsym)) else if(keep.group_vars) # gn2 <- gn else sumw <- gn2 <- wn # } # } else onlyw <- FALSE # # gl <- length(gn) > 0L # necessary here, not before !!! # # if(gl || nTRAl) { # ax <- attributes(x) # attributes(x) <- NULL # if(nTRAl) { # ax[["groups"]] <- NULL # ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) # ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) # if(gl) { # if(keep.group_vars && !onlyw) { # ax[["names"]] <- c(g[[5L]], names(sumw), ax[["names"]][-gn]) # return(setAttributes(c(g[[4L]], sumw, .Call(Cpp_fmeanl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE)), ax)) # } else { # ax[["names"]] <- c(names(sumw), ax[["names"]][-gn]) # return(setAttributes(c(sumw, .Call(Cpp_fmeanl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE)), ax)) # } # } else return(setAttributes(.Call(Cpp_fmeanl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE), ax)) # } else if(keep.group_vars || (keep.w && length(sumw))) { # ax[["names"]] <- c(ax[["names"]][gn2], ax[["names"]][-gn]) # return(setAttributes(c(x[gn2],.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fmeanl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE),g[[2L]],TtI(TRA))), ax)) # } else { # ax[["names"]] <- ax[["names"]][-gn] # return(setAttributes(.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fmeanl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE),g[[2L]],TtI(TRA)), ax)) # } # } else return(.Call(Cpp_TRAl,x,.Call(Cpp_fmeanl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE),g[[2L]],TtI(TRA))) # } collapse/R/zzz.R0000644000176200001440000001516114170151275013244 0ustar liggesusers .onLoad <- function(libname, pkgname) { res <- .Call(C_collapse_init, "init.success") if(!is.character(res) || res != "init.success") stop("collapse not succesfully loaded!") # https://stackoverflow.com/questions/12598242/global-variables-in-packages-in-r # https://stackoverflow.com/questions/49056642/r-how-to-make-variable-available-to-namespace-at-loading-time?noredirect=1&lq=1 .collapse_env <- new.env() # This slows down th eloading of collapse too much. Therefore we load those when needed. # suppressMessages({ # # .collapse_env$fixest_demean <- # if(requireNamespace("fixest", quietly = TRUE)) # fixest::demean else NULL # get0("demean", envir = getNamespace("fixest")) else NULL # # .collapse_env$weights_wtd.cors <- # if(requireNamespace("weights", quietly = TRUE)) # weights::wtd.cors else NULL # get0("wtd.cors", envir = getNamespace("weights")) else NULL # # .collapse_env$RcppArmadillo_fastLm <- # if(requireNamespace("RcppArmadillo", quietly = TRUE)) # RcppArmadillo::fastLmPure else NULL # get0("fastLmPure", envir = getNamespace("RcppArmadillo")) else NULL # _RcppArmadillo_fastLm_impl # # .collapse_env$RcppEigen_fastLm <- # if(requireNamespace("RcppEigen", quietly = TRUE)) # RcppEigen::fastLmPure else NULL # get0("fastLmPure", envir = getNamespace("RcppEigen")) else NULL # RcppEigen_fastLm_Impl # # }) clpns <- parent.env(environment()) assign(".collapse_env", .collapse_env, envir = clpns) # Old solution: does not dynamically update, would have to re-install collapse after installing these packages # assign(".RcppArmadillo_fastLm", # if(requireNamespace("RcppArmadillo", quietly = TRUE)) # get0("_RcppArmadillo_fastLm_impl", envir = getNamespace("RcppArmadillo")) else NULL, envir = parent.env(environment())) if(length(mask <- getOption("collapse_mask")) && is.character(mask)) { # if(!is.character(mask)) stop("Option collapse_mask needs to be character typed") if(any(mask == "all")) mask <- c("helper", "manip", "fast-fun", if(length(mask) > 1L) mask[mask != "all"] else NULL) manipfun <- c("fsubset", "ftransform", "ftransform<-", "ftransformv", "fcompute", "fcomputev", "fselect", "fselect<-", "fgroup_by", "fgroup_vars", "fungroup", "fsummarise", "fmutate", "frename") helperfun <- c("fdroplevels", "finteraction", "fnlevels", "funique", "fnrow", "fncol") # , "fdim": Problem of infinite recursion... if(any(mask == "helper")) mask <- unique.default(c(helperfun, mask[mask != "helper"])) if(any(mask == "manip")) mask <- unique.default(c(manipfun, mask[mask != "manip"])) if(any(mask == "fast-fun")) { mask <- unique.default(c(.FAST_FUN, mask[mask != "fast-fun"])) fsfnonold <- .FAST_STAT_FUN_EXT[!startsWith(.FAST_STAT_FUN_EXT, "fN")] assign(".FAST_STAT_FUN_EXT", c(.FAST_STAT_FUN_EXT, substr(fsfnonold, 2L, 100L)), envir = clpns) assign(".FAST_STAT_FUN_POLD", c(.FAST_STAT_FUN_POLD, substr(.FAST_STAT_FUN, 2L, 100L)), envir = clpns) ffnops <- setdiff(.FAST_FUN_MOPS, .OPERATOR_FUN) assign(".FAST_FUN_MOPS", c(.FAST_FUN_MOPS, substr(ffnops, 2L, 100L)), envir = clpns) } else { if(any(mask == "fast-stat-fun")) { mask <- unique.default(c(.FAST_STAT_FUN, mask[mask != "fast-stat-fun"])) fsfnonold <- .FAST_STAT_FUN_EXT[!startsWith(.FAST_STAT_FUN_EXT, "fN")] assign(".FAST_STAT_FUN_EXT", c(.FAST_STAT_FUN_EXT, substr(fsfnonold, 2L, 100L)), envir = clpns) assign(".FAST_STAT_FUN_POLD", c(.FAST_STAT_FUN_POLD, substr(.FAST_STAT_FUN, 2L, 100L)), envir = clpns) assign(".FAST_FUN_MOPS", c(.FAST_FUN_MOPS, substr(.FAST_STAT_FUN, 2L, 100L)), envir = clpns) } if(any(mask == "fast-trfm-fun")) { ftf <- fsetdiff(.FAST_FUN, .FAST_STAT_FUN) mask <- unique.default(c(ftf, mask[mask != "fast-trfm-fun"])) assign(".FAST_FUN_MOPS", c(.FAST_FUN_MOPS, substr(fsetdiff(ftf, c("fhdbetween", "fhdwithin")), 2L, 100L)), envir = clpns) } } if(!all(m <- mask %in% names(clpns))) stop("Unknown collapse functions supplied to option 'collapse_mask': ", paste(mask[!m], collapse = ", ")) if(!all(m <- startsWith(mask, "f"))) stop("All functions to me masked must start with 'f'. You supplied: ", paste(mask[!m], collapse = ", ")) unmask <- substr(mask, 2L, 100L) for(i in seq_along(mask)) assign(unmask[i], clpns[[mask[i]]], envir = clpns) namespaceExport(clpns, unmask) } if(isTRUE(getOption("collapse_F_to_FALSE"))) { assign("F", FALSE, envir = clpns) } # Experimental collapse_remove option: doesn't work because namespace exports not defined yet. # if(length(crem <- getOption("collapse_remove")) && is.character(crem)) { # # clpns <- getNamespace("collapse") # exports <- getNamespaceInfo(clpns, "exports") # clpns[[".__NAMESPACE__."]][["exports"]] # .getNamespaceInfo(clpns, "exports") # stop("length:", length(exports)) # remove(list = crem, envir = exports) # # setNamespaceInfo(clpns, "exports", exports) # # detach("package:collapse") # # attachNamespace(clpns) # # clpns[[".__NAMESPACE__."]][["exports"]] <- exports # } options(collapse_unused_arg_action = "warning", # error, warning, message or none collapse_DT_alloccol = 100L) invisible(res) } .onAttach <- function(libname, pkgname) { packageStartupMessage(paste0("collapse ",packageVersion("collapse"),", see ?`collapse-package` or ?`collapse-documentation`")) # \nNote: stats::D -> D.expression, D.call, D.name } .onUnload <- function (libpath) { library.dynam.unload("collapse", libpath) } # Note: To create local dev version of package change package name in DESCRIPTION, NAMESPACE, this file (including C_collapse_init), # replace all instances of `_collapse_` in source files (except for _collapse_DT_alloccol`), and also rename `R_init_collapse` in ExportSymbols.cpp. # and in vignetter / Rd files replace library(collapse) release_questions <- function() { c( "Have you updated the version number in DESCRIPTION, NEWS.md, NEWS.Rd, cran.comments and .onAttach?", "Updated Readme?", "Spell check ?", "built vignettes properly with Sys.setenv(RUNBENCH = TRUE)?", "Have you updated all help files with code changes, even if it's only documenting arguments or links?", "updated collapse-package.Rd and collapse-documentation.Rd?", "All functions in global_macros.R?", "checked all depreciated functions and arguments?", "any changess to arguments or order of arguments in key functions (GRP etc.). Does everything work?" ) } collapse/R/descr.R0000644000176200001440000001305614167331346013515 0ustar liggesusers # Could make label attribute optional ! descr <- function(X, Ndistinct = TRUE, higher = TRUE, table = TRUE, Qprobs = c(0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99), cols = NULL, label.attr = 'label', ...) { nam <- l1orlst(as.character(substitute(X))) armat <- function(x, y) c(x[1L], Ndist = y, x[-1L]) natrm <- function(x) if(is.na(names(x)[length(x)])) x[-length(x)] else x # Remove NA from table ! dotsok <- if(!missing(...)) names(substitute(c(...))[-1L]) %!in% c('pid','g') else TRUE numstats <- if(Ndistinct && dotsok) function(x, ...) armat(qsu.default(x, higher = higher, ...), fndistinctCpp(x)) else function(x, ...) qsu.default(x, higher = higher, ...) descrnum <- if(is.numeric(Qprobs)) function(x, ...) list(Class = class(x), Label = attr(x, label.attr), Stats = numstats(x, ...), Quant = quantile(x, probs = Qprobs, na.rm = TRUE)) else function(x, ...) list(Class = class(x), Label = attr(x, label.attr), Stats = numstats(x, ...)) # Could make this more efficient ? descrcat <- function(x, tab = table) if(tab) list(Class = class(x), Label = attr(x, label.attr), Stats = if(Ndistinct) c(N = fnobsC(x), Ndist = fndistinctCpp(x)) else `names<-`(fnobsC(x), 'Nobs'), Table = natrm(fnobs.default(x, x))) else # table(x). fnobs is a lot Faster, but includes NA as level ! list(Class = class(x), Label = attr(x, label.attr), Stats = if(Ndistinct) c(N = fnobsC(x), Ndist = fndistinctCpp(x)) else `names<-`(fnobsC(x), 'Nobs')) descrdate <- function(x) list(Class = class(x), Label = attr(x, label.attr), Stats = c(if(Ndistinct) c(N = fnobsC(x), Ndist = fndistinctCpp(x)) else `names<-`(fnobsC(x), 'Nobs'), `names<-`(range(x, na.rm = TRUE), c("Min", "Max")))) if(is.list(X)) { is_sf <- inherits(X, "sf") class(X) <- NULL if(is_sf) X[[attr(X, "sf_column")]] <- NULL } else X <- unclass(qDF(X)) if(length(cols)) X <- X[cols2int(cols, X, names(X), FALSE)] res <- vector('list', length(X)) num <- .Call(C_vtypes, X, 1L) # vapply(unattrib(X), is.numeric, TRUE) res[num] <- lapply(X[num], descrnum, ...) if(!all(num)) { date <- vapply(unattrib(X), is_date, TRUE) if(any(date)) { res[date] <- lapply(X[date], descrdate) cat <- !(num | date) } else cat <- !num res[cat] <- lapply(X[cat], descrcat) } attributes(res) <- list(names = names(X), name = nam, N = length(X[[1L]]), arstat = !dotsok, table = table, class = "descr") res } print.descr <- function(x, n = 7, perc = TRUE, digits = 2, t.table = TRUE, summary = TRUE, ...) { w <- paste(rep("-", .Options$width), collapse = "") nam <- names(x) arstat <- attr(x, "arstat") cb <- function(...) if(t.table) cbind(...) else formatC(rbind(...), drop0trailing = TRUE) ct <- function(z) if(t.table) cbind(Freq = z) else z cat('Dataset: ', attr(x,"name"),', ',length(x), ' Variables, N = ', attr(x, "N"), "\n", sep = "") cat(w, "\n", sep = "") for(i in seq_along(x)) { xi <- x[[i]] namxi <- names(xi) cat(nam[i]," (",strclp(xi[[1L]]),"): ",xi[[2L]], "\n", sep = "") cat(namxi[3L], ": \n", sep = "") print.qsu(xi[[3L]], digits) if(length(xi) > 3L) { if(arstat) cat("\n") cat(namxi[4L], ": \n", sep = "") if(namxi[4L] == "Table") { t <- unclass(xi[[4L]]) if(length(t) <= 2*n) { if(perc) print.default(cb(Freq = t, Perc = round(t/bsum(t)*100, digits)), right = TRUE, print.gap = 2, quote = FALSE) else print.table(ct(t)) } else { lt <- length(t) t1 <- t[seq_len(n)] t2 <- t[seq(lt-n, lt)] if(perc) { st <- bsum(t) print.default(cb(Freq = t1, Perc = round(t1/st*100, digits)), right = TRUE, print.gap = 2, quote = FALSE) cat(" ---\n") print.default(cb(Freq = t2, Perc = round(t2/st*100, digits)), right = TRUE, print.gap = 2, quote = FALSE) } else { print.table(ct(t1)) cat(" ---\n") print.table(ct(t2)) } if(summary) { cat("\nSummary of Table: \n") print.summaryDefault(summary.default(t), digits) } } } else print.qsu(xi[[4L]], digits) } cat(w, "\n", sep = "") # More compressed -> better ! # cat("\n", w, "\n", sep = "") } invisible(x) } # Not pasteclass !! # Note: This does not work for array stats (using g or pid.. ) as.data.frame.descr <- function(x, ...) { if(attr(x, "arstat")) stop("Cannot handle arrays of statistics!") if(attr(x, "table")) { r <- lapply(x, function(z) c(list(Class = strclp(z[[1L]]), Label = null2NA(z[[2L]])), unlist(`names<-`(lapply(z[names(z) != "Table"][-(1:2)], as.vector, "list"), NULL), recursive = FALSE))) } else { r <- lapply(x, function(z) c(list(Class = strclp(z[[1L]]), Label = null2NA(z[[2L]])), unlist(`names<-`(lapply(z[-(1:2)], as.vector, "list"), NULL), recursive = FALSE))) } r <- .Call(C_rbindlist, r, TRUE, TRUE, "Variable") if(allNA(r[["Label"]])) r[["Label"]] <- NULL attr(r, "row.names") <- .set_row_names(length(r[[1L]])) class(r) <- "data.frame" r } collapse/R/rsplit.R0000644000176200001440000001014114163373260013717 0ustar liggesusers # fsplit <- function(x, f, drop, ...) if(drop && is.factor(f)) # split(x, .Call(Cpp_fdroplevels, f, !inherits(f, "na.included")), drop = FALSE, ...) else # split(x, qF(f), drop = FALSE, ...) t_list2 <- function(x) .Call(Cpp_mctl, do.call(rbind, x), TRUE, 0L) # This is for export t_list <- function(l) { lmat <- do.call(rbind, l) rn <- dimnames(lmat)[[1L]] .Call(C_copyMostAttrib, lapply(.Call(Cpp_mctl, lmat, TRUE, 0L), `names<-`, rn), l) } rsplit <- function(x, ...) UseMethod("rsplit") rsplit.default <- function(x, fl, drop = TRUE, flatten = FALSE, use.names = TRUE, ...) { # , check = TRUE if(is.atomic(fl) || flatten || is_GRP(fl)) return(gsplit(x, fl, use.names, drop = drop, ...)) attributes(fl) <- NULL # if(check) fl <- lapply(fl, qF) # necessary ? -> split.default is actually faster on non-factor variables ! rspl <- function(y, fly) { if(length(fly) == 1L) return(gsplit(y, fly[[1L]], use.names, drop = drop, ...)) mapply(rspl, y = gsplit(y, fly[[1L]], use.names, drop = drop, ...), fly = t_list2(lapply(fly[-1L], gsplit, fly[[1L]], use.names, drop = drop, ...)), SIMPLIFY = FALSE) # Possibility to avoid transpose ? C_subsetDT ?? } rspl(x, fl) } # From stackoverflow package: # rsplit <- function (x, by, drop = FALSE) # { # if (is.atomic(by)) # return(split(x, by, drop = drop)) # attributes(by) <- NULL # if (length(by) == 1L) # return(split(x, by[[1L]], drop = drop)) # mapply(rsplit, x = split(x, by[[1L]], drop = drop), by = t(lapply(by[-1L], split, by[[1L]], drop = drop)), drop = drop, # SIMPLIFY = FALSE) # } rsplit.data.frame <- function(x, by, drop = TRUE, flatten = FALSE, # check = TRUE, cols = NULL, keep.by = FALSE, simplify = TRUE, use.names = TRUE, ...) { if(is.call(by)) { nam <- attr(x, "names") if(length(by) == 3L) { byn <- ckmatch(all.vars(by[[3L]]), nam) cols <- ckmatch(all.vars(by[[2L]]), nam) } else { # keep.by always added: Same behavior as L or W !! byn <- ckmatch(all.vars(by), nam) if(!(is.null(cols) && keep.by)) cols <- if(is.null(cols)) -byn else cols2int(cols, x, nam, FALSE) } by <- .subset(x, byn) if(length(cols)) x <- fcolsubset(x, if(keep.by) c(byn, cols) else cols, TRUE) } else if(length(cols)) x <- fcolsubset(x, cols2int(cols, x, attr(x, "names"), FALSE), TRUE) if(simplify && length(unclass(x)) == 1L) return(rsplit.default(.subset2(x, 1L), by, drop, flatten, use.names, ...)) # , check # Note there is a data.table method: split.data.table, which can also do recursive splitting.. j <- seq_along(unclass(x)) rn <- attr(x, "row.names") if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") { gsplit_DF <- function(x, f, ...) lapply(gsplit(NULL, f, use.names, drop = drop, ...), function(i) .Call(C_subsetDT, x, i, j, FALSE)) # .Call, .NAME = C_subsetDT, j, FALSE) -> doesn't work! } else { gsplit_DF <- function(x, f, ...) { rown <- attr(x, "row.names") # Need to do this, handing down from the function body doesn't work lapply(gsplit(NULL, f, use.names, drop = drop, ...), function(i) `attr<-`(.Call(C_subsetDT, x, i, j, FALSE), "row.names", rown[i])) } } if(is.atomic(by) || flatten || is_GRP(by)) return(gsplit_DF(x, by, ...)) attributes(by) <- NULL # if(check) by <- lapply(by, qF) # necessary ? rspl_DF <- function(y, fly) { if(length(fly) == 1L) return(gsplit_DF(y, fly[[1L]], ...)) mapply(rspl_DF, y = gsplit_DF(y, fly[[1L]], ...), fly = t_list2(lapply(fly[-1L], gsplit, fly[[1L]], use.names, drop = drop, ...)), SIMPLIFY = FALSE) # Possibility to avoid transpose ? } # use C_subsetDT here as well ??? what is faster ??? rspl_DF(x, by) } # Misc trial: # # fli <- lapply(fl[-1L], split, fl[[1L]], ...) # for(i in seq_len(length(fl)-1L)) { # r <- rapply(r, split, fl[[1L]], how = "list") # fli <- rapply(fli[-1L], split, fli[[2L]], ..., how = "list") # } # } # r collapse/R/fndistinct.R0000644000176200001440000001537614176642305014571 0ustar liggesusers # For foundational changes to this code see fsum.R # Note: matrix method needs memory equal to size of the object, while data.frame method does not need any memory ?! fndistinct <- function(x, ...) UseMethod("fndistinct") # , x fndistinct.default <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fndistinct.matrix(x, g, TRA, na.rm, use.g.names, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fndistinct,x,0L,0L,NULL,na.rm)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(Cpp_fndistinct,x,length(lev),g,NULL,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fndistinct,x,fnlevels(g),g,NULL,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fndistinct,x,attr(g,"N.groups"),g,NULL,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(Cpp_fndistinct,x,g[[1L]],g[[2L]],g[[3L]],na.rm), GRPnames(g))) return(.Call(Cpp_fndistinct,x,g[[1L]],g[[2L]],g[[3L]],na.rm)) } if(is.null(g)) return(.Call(Cpp_TRA,x,.Call(Cpp_fndistinct,x,0L,0L,NULL,na.rm),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRA,x,.Call(Cpp_fndistinct,x,g[[1L]],g[[2L]],g[[3L]],na.rm),g[[2L]],TtI(TRA)) } fndistinct.matrix <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fndistinctm,x,0L,0L,NULL,na.rm,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(Cpp_fndistinctm,x,length(lev),g,NULL,na.rm,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(Cpp_fndistinctm,x,fnlevels(g),g,NULL,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fndistinctm,x,attr(g,"N.groups"),g,NULL,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(Cpp_fndistinctm,x,g[[1L]],g[[2L]],g[[3L]],na.rm,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(Cpp_fndistinctm,x,g[[1L]],g[[2L]],g[[3L]],na.rm,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAm,x,.Call(Cpp_fndistinctm,x,0L,0L,NULL,na.rm,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAm,x,.Call(Cpp_fndistinctm,x,g[[1L]],g[[2L]],g[[3L]],na.rm,FALSE),g[[2L]],TtI(TRA)) } fndistinct.data.frame <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fndistinctl,x,0L,0L,NULL,na.rm,drop)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(Cpp_fndistinctl,x,length(lev),g,NULL,na.rm,FALSE), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fndistinctl,x,fnlevels(g),g,NULL,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fndistinctl,x,attr(g,"N.groups"),g,NULL,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(Cpp_fndistinctl,x,g[[1L]],g[[2L]],g[[3L]],na.rm,FALSE), groups)) return(.Call(Cpp_fndistinctl,x,g[[1L]],g[[2L]],g[[3L]],na.rm,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAl,x,.Call(Cpp_fndistinctl,x,0L,0L,NULL,na.rm,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAl,x,.Call(Cpp_fndistinctl,x,g[[1L]],g[[2L]],g[[3L]],na.rm,FALSE),g[[2L]],TtI(TRA)) } fndistinct.list <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) fndistinct.data.frame(x, g, TRA, na.rm, use.g.names, drop, ...) fndistinct.grouped_df <- function(x, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam[-gn]) return(setAttributes(c(g[[4L]],.Call(Cpp_fndistinctl,x[-gn],g[[1L]],g[[2L]],g[[3L]],na.rm,FALSE)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(Cpp_fndistinctl,x[-gn],g[[1L]],g[[2L]],g[[3L]],na.rm,FALSE), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]],.Call(Cpp_fndistinctl,x,g[[1L]],g[[2L]],g[[3L]],na.rm,FALSE)), ax)) } else return(setAttributes(.Call(Cpp_fndistinctl,x,g[[1L]],g[[2L]],g[[3L]],na.rm,FALSE), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(nam[gn], nam[-gn]) return(setAttributes(c(x[gn],.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fndistinctl,x[-gn],g[[1L]],g[[2L]],g[[3L]],na.rm,FALSE),g[[2L]],TtI(TRA))), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fndistinctl,x[-gn],g[[1L]],g[[2L]],g[[3L]],na.rm,FALSE),g[[2L]],TtI(TRA)), ax)) } else return(.Call(Cpp_TRAl,x,.Call(Cpp_fndistinctl,x,g[[1L]],g[[2L]],g[[3L]],na.rm,FALSE),g[[2L]],TtI(TRA))) } fNdistinct <- function(x, ...) { message("Note that 'fNdistinct' was renamed to 'fndistinct'. The S3 generic will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") UseMethod("fndistinct") } fNdistinct.default <- function(x, ...) { .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fndistinct.default(x, ...) } fNdistinct.matrix <- function(x, ...) { .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fndistinct.matrix(x, ...) } fNdistinct.data.frame <- function(x, ...) { .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fndistinct.data.frame(x, ...) } collapse/R/my_RcppExports.R0000644000176200001440000002155614172367040015413 0ustar liggesusers BWCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, theta = 1, set_mean = 0, B = FALSE, fill = FALSE) { .Call(Cpp_BW, x, ng, g, gs, w, narm, theta, set_mean, B, fill) } BWmCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, theta = 1, set_mean = 0, B = FALSE, fill = FALSE) { .Call(Cpp_BWm, x, ng, g, gs, w, narm, theta, set_mean, B, fill) } BWlCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, theta = 1, set_mean = 0, B = FALSE, fill = FALSE) { .Call(Cpp_BWl, x, ng, g, gs, w, narm, theta, set_mean, B, fill) } TRACpp <- function(x, xAG, g = 0L, ret = 1L) { .Call(Cpp_TRA, x, xAG, g, ret) } TRAmCpp <- function(x, xAG, g = 0L, ret = 1L) { .Call(Cpp_TRAm, x, xAG, g, ret) } TRAlCpp <- function(x, xAG, g = 0L, ret = 1L) { .Call(Cpp_TRAl, x, xAG, g, ret) } fndistinctCpp <- function(x, ng = 0L, g = 0L, gs = NULL, narm = TRUE) { .Call(Cpp_fndistinct, x, ng, g, gs, narm) } fndistinctlCpp <- function(x, ng = 0L, g = 0L, gs = NULL, narm = TRUE, drop = TRUE) { .Call(Cpp_fndistinctl, x, ng, g, gs, narm, drop) } fndistinctmCpp <- function(x, ng = 0L, g = 0L, gs = NULL, narm = TRUE, drop = TRUE) { .Call(Cpp_fndistinctm, x, ng, g, gs, narm, drop) } pwnobsmCpp <- function(x) { .Call(Cpp_pwnobsm, x) } fnobsC <- function(x, ng = 0L, g = 0L) { .Call(C_fnobs, x, ng, g) } # fnobsmC <- function(x, ng = 0L, g = 0L, drop = TRUE) { # .Call(C_fnobsm, x, ng, g, drop) # } # fnobslC <- function(x, ng = 0L, g = 0L, drop = TRUE) { # .Call(C_fnobsl, x, ng, g, drop) # } varyingCpp <- function(x, ng = 0L, g = 0L, any_group = TRUE) { .Call(Cpp_varying, x, ng, g, any_group) } varyingmCpp <- function(x, ng = 0L, g = 0L, any_group = TRUE, drop = TRUE) { .Call(Cpp_varyingm, x, ng, g, any_group, drop) } varyinglCpp <- function(x, ng = 0L, g = 0L, any_group = TRUE, drop = TRUE) { .Call(Cpp_varyingl, x, ng, g, any_group, drop) } fbstatsCpp <- function(x, ext = FALSE, ng = 0L, g = 0L, npg = 0L, pg = 0L, w = NULL, stable.algo = TRUE, array = TRUE, setn = TRUE, gn = NULL) { .Call(Cpp_fbstats, x, ext, ng, g, npg, pg, w, stable.algo, array, setn, gn) } fbstatsmCpp <- function(x, ext = FALSE, ng = 0L, g = 0L, npg = 0L, pg = 0L, w = NULL, stable.algo = TRUE, array = TRUE, gn = NULL) { .Call(Cpp_fbstatsm, x, ext, ng, g, npg, pg, w, stable.algo, array, gn) } fbstatslCpp <- function(x, ext = FALSE, ng = 0L, g = 0L, npg = 0L, pg = 0L, w = NULL, stable.algo = TRUE, array = TRUE, gn = NULL) { .Call(Cpp_fbstatsl, x, ext, ng, g, npg, pg, w, stable.algo, array, gn) } fdiffgrowthCpp <- function(x, n = 1L, diff = 1L, fill = NA_real_, ng = 0L, g = 0L, gs = NULL, t = NULL, ret = 1L, rho = 1, names = TRUE, power = 1) { .Call(Cpp_fdiffgrowth, x, n, diff, fill, ng, g, gs, t, ret, rho, names, power) } fdiffgrowthmCpp <- function(x, n = 1L, diff = 1L, fill = NA_real_, ng = 0L, g = 0L, gs = NULL, t = NULL, ret = 1L, rho = 1, names = TRUE, power = 1) { .Call(Cpp_fdiffgrowthm, x, n, diff, fill, ng, g, gs, t, ret, rho, names, power) } fdiffgrowthlCpp <- function(x, n = 1L, diff = 1L, fill = NA_real_, ng = 0L, g = 0L, gs = NULL, t = NULL, ret = 1L, rho = 1, names = TRUE, power = 1) { .Call(Cpp_fdiffgrowthl, x, n, diff, fill, ng, g, gs, t, ret, rho, names, power) } flagleadCpp <- function(x, n = 1L, fill = NULL, ng = 0L, g = 0L, t = NULL, names = TRUE) { .Call(Cpp_flaglead, x, n, fill, ng, g, t, names) } flagleadmCpp <- function(x, n = 1L, fill = NULL, ng = 0L, g = 0L, t = NULL, names = TRUE) { .Call(Cpp_flagleadm, x, n, fill, ng, g, t, names) } flagleadlCpp <- function(x, n = 1L, fill = NULL, ng = 0L, g = 0L, t = NULL, names = TRUE) { .Call(Cpp_flagleadl, x, n, fill, ng, g, t, names) } fmeanCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE) { .Call(Cpp_fmean, x, ng, g, gs, w, narm) } fmeanmCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, drop = TRUE) { .Call(Cpp_fmeanm, x, ng, g, gs, w, narm, drop) } fmeanlCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, drop = TRUE) { .Call(Cpp_fmeanl, x, ng, g, gs, w, narm, drop) } fnthCpp <- function(x, n = 0.5, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, ret = 1L) { .Call(Cpp_fnth, x, n, ng, g, gs, w, narm, ret) } fnthmCpp <- function(x, n = 0.5, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, drop = TRUE, ret = 1L) { .Call(Cpp_fnthm, x, n, ng, g, gs, w, narm, drop, ret) } fnthlCpp <- function(x, n = 0.5, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, drop = TRUE, ret = 1L) { .Call(Cpp_fnthl, x, n, ng, g, gs, w, narm, drop, ret) } fmodeCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, ret = 0L) { .Call(Cpp_fmode, x, ng, g, gs, w, narm, ret) } fmodelCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, ret = 0L) { .Call(Cpp_fmodel, x, ng, g, gs, w, narm, ret) } fmodemCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, drop = TRUE, ret = 0L) { .Call(Cpp_fmodem, x, ng, g, gs, w, narm, drop, ret) } fprodCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE) { .Call(Cpp_fprod, x, ng, g, w, narm) } fprodmCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, drop = TRUE) { .Call(Cpp_fprodm, x, ng, g, w, narm, drop) } fprodlCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, drop = TRUE) { .Call(Cpp_fprodl, x, ng, g, w, narm, drop) } fscaleCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, set_mean = 0, set_sd = 1) { .Call(Cpp_fscale, x, ng, g, w, narm, set_mean, set_sd) } fscalemCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, set_mean = 0, set_sd = 1) { .Call(Cpp_fscalem, x, ng, g, w, narm, set_mean, set_sd) } fscalelCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, set_mean = 0, set_sd = 1) { .Call(Cpp_fscalel, x, ng, g, w, narm, set_mean, set_sd) } fsumC <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE) { .Call(C_fsum, x, ng, g, w, narm) } # fsummC <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, drop = TRUE) { # .Call(C_fsumm, x, ng, g, w, narm, drop) # } # fsumlC <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, drop = TRUE) { # .Call(C_fsuml, x, ng, g, w, narm, drop) # } fvarsdCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, stable_algo = TRUE, sd = TRUE) { .Call(Cpp_fvarsd, x, ng, g, gs, w, narm, stable_algo, sd) } fvarsdmCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, stable_algo = TRUE, sd = TRUE, drop = TRUE) { .Call(Cpp_fvarsdm, x, ng, g, gs, w, narm, stable_algo, sd, drop) } fvarsdlCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, stable_algo = TRUE, sd = TRUE, drop = TRUE) { .Call(Cpp_fvarsdl, x, ng, g, gs, w, narm, stable_algo, sd, drop) } mrtl <- function(X, names = FALSE, return = "list") { switch(return, list = .Call(Cpp_mrtl, X, names, 0L), data.frame = .Call(Cpp_mrtl, X, names, 1L), data.table = alc(.Call(Cpp_mrtl, X, names, 2L)), stop("Unknown return option!")) } mctl <- function(X, names = FALSE, return = "list") { switch(return, list = .Call(Cpp_mctl, X, names, 0L), data.frame = .Call(Cpp_mctl, X, names, 1L), data.table = alc(.Call(Cpp_mctl, X, names, 2L)), stop("Unknown return option!")) } psmatCpp <- function(x, g, t = NULL, transpose = FALSE) { .Call(Cpp_psmat, x, g, t, transpose) } qFCpp <- function(x, ordered = TRUE, na_exclude = TRUE, keep_attr = TRUE, ret = 1L) { .Call(Cpp_qF, x, ordered, na_exclude, keep_attr, ret) } funiqueCpp <- function(x, sort = TRUE) { .Call(Cpp_funique, x, sort) } fdroplevelsCpp <- function(x, check_NA = TRUE) { .Call(Cpp_fdroplevels, x, check_NA) } setAttributes <- function(x, a) { .Call(C_setAttributes, x, a) } copyMostAttributes <- function(to, from) { .Call(C_copyMostAttributes, to, from) } setattributes <- function(x, a) { invisible(.Call(C_setattributes, x, a)) } duplAttributes <- function(x, y) { .Call(C_duplAttributes, x, y) } # No longer needed... # setattr <- function(x, a, v) { # invisible(.Call(C_setattr, x, a, v)) # } # duplattributes <- function(x, y) { # invisible(.Call(C_duplattributes, x, y)) # } # cond_duplAttributes <- function(x, y) { # .Call(C_cond_duplAttributes, x, y) # } # cond_duplattributes <- function(x, y) { # invisible(.Call(C_cond_duplattributes, x, y)) # } seqid <- function(x, o = NULL, del = 1L, start = 1L, na.skip = FALSE, skip.seq = FALSE, check.o = TRUE) { .Call(Cpp_seqid, x, o, del, start, na.skip, skip.seq, check.o) } groupid <- function(x, o = NULL, start = 1L, na.skip = FALSE, check.o = TRUE) { .Call(Cpp_groupid, x, o, start, na.skip, check.o) } collapse/R/fscale.R0000644000176200001440000002160514174223734013650 0ustar liggesusers # Make faster ? cm <- function(x) if(is.double(x)) x else if(is.character(x) && x == "overall.mean") -Inf else if(isFALSE(x)) Inf else stop("mean must be a number, 'overall.mean' or FALSE") csd <- function(x) if(is.double(x)) x else if(is.character(x) && x == "within.sd") -Inf else stop("sd must be a number or 'within.sd'") # TODO: w.type - Implement reliability weights? fscale <- function(x, ...) UseMethod("fscale") # , x fscale.default <- function(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, sd = 1, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fscale.matrix(x, g, w, na.rm, mean, sd, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_fscale,x,0L,0L,w,na.rm,cm(mean),csd(sd))) g <- G_guo(g) .Call(Cpp_fscale,x,g[[1L]],g[[2L]],w,na.rm,cm(mean),csd(sd)) } fscale.pseries <- function(x, effect = 1L, w = NULL, na.rm = TRUE, mean = 0, sd = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- if(length(effect) == 1L) .subset2(getpix(attr(x, "index")), effect) else finteraction(.subset(getpix(attr(x, "index")), effect)) .Call(Cpp_fscale,x,fnlevels(g),g,w,na.rm,cm(mean),csd(sd)) } fscale.matrix <- function(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, sd = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_fscalem,x,0L,0L,w,na.rm,cm(mean),csd(sd))) g <- G_guo(g) .Call(Cpp_fscalem,x,g[[1L]],g[[2L]],w,na.rm,cm(mean),csd(sd)) } fscale.grouped_df <- function(x, w = NULL, na.rm = TRUE, mean = 0, sd = 1, keep.group_vars = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- l1orn(as.character(substitute(w)), NULL) nam <- attr(x, "names") gn2 <- which(nam %in% g[[5L]]) gn <- if(keep.group_vars) gn2 else NULL if(length(wsym) && length(wn <- whichv(nam, wsym))) { w <- .subset2(x, wn) if(any(gn2 == wn)) stop("Weights coincide with grouping variables!") gn2 <- c(gn2, wn) if(keep.w) gn <- c(gn, wn) } if(length(gn2)) { # if(!length(gn)) return(.Call(Cpp_fscalel,x[-gn2],g[[1L]],g[[2L]],w,na.rm,cm(mean),csd(sd))) ax <- attributes(x) ax[["names"]] <- c(nam[gn], nam[-gn2]) # first term is removed if !length(gn) res <- .Call(Cpp_fscalel, .subset(x, -gn2), g[[1L]],g[[2L]],w,na.rm,cm(mean),csd(sd)) if(length(gn)) return(setAttributes(c(.subset(x, gn), res), ax)) else return(setAttributes(res, ax)) } .Call(Cpp_fscalel,x,g[[1L]],g[[2L]],w,na.rm,cm(mean),csd(sd)) } fscale.data.frame <- function(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, sd = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_fscalel,x,0L,0L,w,na.rm,cm(mean),csd(sd))) g <- G_guo(g) .Call(Cpp_fscalel,x,g[[1L]],g[[2L]],w,na.rm,cm(mean),csd(sd)) } fscale.list <- function(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, sd = 1, ...) fscale.data.frame(x, g, w, na.rm, mean, sd, ...) fscale.pdata.frame <- function(x, effect = 1L, w = NULL, na.rm = TRUE, mean = 0, sd = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- if(length(effect) == 1L) .subset2(getpix(attr(x, "index")), effect) else finteraction(.subset(getpix(attr(x, "index")), effect)) .Call(Cpp_fscale,x,fnlevels(g),g,w,na.rm,cm(mean),csd(sd)) } # Standardization Operator STD <- function(x, ...) UseMethod("STD") # , x STD.default <- function(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, sd = 1, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(STD.matrix(x, g, w, na.rm, mean, sd, ...)) fscale.default(x, g, w, na.rm, mean, sd, ...) } STD.pseries <- function(x, effect = 1L, w = NULL, na.rm = TRUE, mean = 0, sd = 1, ...) fscale.pseries(x, effect, w, na.rm, mean, sd, ...) STD.matrix <- function(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, sd = 1, stub = "STD.", ...) add_stub(fscale.matrix(x, g, w, na.rm, mean, sd, ...), stub) STD.grouped_df <- function(x, w = NULL, na.rm = TRUE, mean = 0, sd = 1, stub = "STD.", keep.group_vars = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- l1orn(as.character(substitute(w)), NULL) nam <- attr(x, "names") gn2 <- which(nam %in% g[[5L]]) gn <- if(keep.group_vars) gn2 else NULL if(length(wsym) && length(wn <- whichv(nam, wsym))) { w <- .subset2(x, wn) if(any(gn2 == wn)) stop("Weights coincide with grouping variables!") gn2 <- c(gn2,wn) if(keep.w) gn <- c(gn,wn) } if(length(gn2)) { ax <- attributes(x) ax[["names"]] <- c(nam[gn], if(is.character(stub)) paste0(stub, nam[-gn2]) else nam[-gn2]) res <- .Call(Cpp_fscalel, .subset(x, -gn2), g[[1L]],g[[2L]],w,na.rm,cm(mean),csd(sd)) if(length(gn)) return(setAttributes(c(.subset(x, gn), res), ax)) else return(setAttributes(res, ax)) } add_stub(.Call(Cpp_fscalel,x,g[[1L]],g[[2L]],w,na.rm,cm(mean),csd(sd)), stub) } # updated (best) version ! STD.pdata.frame <- function(x, effect = 1L, w = NULL, cols = is.numeric, na.rm = TRUE, mean = 0, sd = 1, stub = "STD.", keep.ids = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) ax <- attributes(x) class(x) <- NULL nam <- names(x) g <- if(length(effect) == 1L) .subset2(getpix(ax[["index"]]), effect) else finteraction(.subset(getpix(ax[["index"]]), effect)) if(keep.ids) { gn <- which(nam %in% attr(getpix(ax[["index"]]), "names")) if(length(gn) && is.null(cols)) cols <- seq_along(x)[-gn] } else gn <- NULL if(length(cols)) cols <- cols2int(cols, x, nam) if(is.call(w)) { wn <- ckmatch(all.vars(w), nam, "Unknown weight variable:") w <- x[[wn]] cols <- if(is.null(cols)) seq_along(x)[-wn] else cols[cols != wn] if(keep.w) gn <- c(gn, wn) } if(length(gn) && length(cols)) { ax[["names"]] <- c(nam[gn], if(is.character(stub)) paste0(stub, nam[cols]) else nam[cols]) return(setAttributes(c(x[gn], .Call(Cpp_fscalel,x[cols],fnlevels(g),g,w,na.rm,cm(mean),csd(sd))), ax)) } if(!length(gn)) { ax[["names"]] <- if(is.character(stub)) paste0(stub, nam[cols]) else nam[cols] return(setAttributes(.Call(Cpp_fscalel,x[cols],fnlevels(g),g,w,na.rm,cm(mean),csd(sd)), ax)) } if(is.character(stub)) { ax[["names"]] <- paste0(stub, nam) return(setAttributes(.Call(Cpp_fscalel,x,fnlevels(g),g,w,na.rm,cm(mean),csd(sd)), ax)) } .Call(Cpp_fscalel,`oldClass<-`(x, ax[["class"]]),fnlevels(g),g,w,na.rm,cm(mean),csd(sd)) } # updated, fast and data.table proof version ! STD.data.frame <- function(x, by = NULL, w = NULL, cols = is.numeric, na.rm = TRUE, mean = 0, sd = 1, stub = "STD.", keep.by = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.call(by) || is.call(w)) { ax <- attributes(x) class(x) <- NULL nam <- names(x) if(is.call(by)) { if(length(by) == 3L) { cols <- ckmatch(all.vars(by[[2L]]), nam) gn <- ckmatch(all.vars(by[[3L]]), nam) } else { gn <- ckmatch(all.vars(by), nam) cols <- if(is.null(cols)) seq_along(x)[-gn] else cols2int(cols, x, nam) } by <- G_guo(if(length(gn) == 1L) x[[gn]] else x[gn]) if(!keep.by) gn <- NULL } else { gn <- NULL if(length(cols)) cols <- cols2int(cols, x, nam) by <- if(is.null(by)) list(0L, 0L) else G_guo(by) } if(is.call(w)) { wn <- ckmatch(all.vars(w), nam, "Unknown weight variable:") w <- x[[wn]] cols <- if(is.null(cols)) seq_along(x)[-wn] else cols[cols != wn] if(keep.w) gn <- c(gn, wn) } if(length(gn)) { ax[["names"]] <- c(nam[gn], if(is.character(stub)) paste0(stub, nam[cols]) else nam[cols]) return(setAttributes(c(x[gn], .Call(Cpp_fscalel,x[cols],by[[1L]],by[[2L]],w,na.rm,cm(mean),csd(sd))), ax)) } ax[["names"]] <- if(is.character(stub)) paste0(stub, nam[cols]) else nam[cols] return(setAttributes(.Call(Cpp_fscalel,x[cols],by[[1L]],by[[2L]],w,na.rm,cm(mean),csd(sd)), ax)) } else if(length(cols)) { # Needs to be like this, otherwise subsetting dropps the attributes !! ax <- attributes(x) class(x) <- NULL x <- x[cols2int(cols, x, names(x), FALSE)] ax[["names"]] <- names(x) setattributes(x, ax) } if(is.character(stub)) attr(x, "names") <- paste0(stub, attr(x, "names")) if(is.null(by)) return(.Call(Cpp_fscalel,x,0L,0L,w,na.rm,cm(mean),csd(sd))) by <- G_guo(by) .Call(Cpp_fscalel,x,by[[1L]],by[[2L]],w,na.rm,cm(mean),csd(sd)) } STD.list <- function(x, by = NULL, w = NULL, cols = is.numeric, na.rm = TRUE, mean = 0, sd = 1, stub = "STD.", keep.by = TRUE, keep.w = TRUE, ...) STD.data.frame(x, by, w, cols, na.rm, mean, sd, stub, keep.by, keep.w, ...) collapse/R/ffirst.R0000644000176200001440000001253514172367040013707 0ustar liggesusers # Note: for foundational changes to this code see fsum.R ffirst <- function(x, ...) UseMethod("ffirst") # , x ffirst.default <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(ffirst.matrix(x, g, TRA, na.rm, use.g.names, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(C_ffirst,x,0L,0L,na.rm)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(C_ffirst,x,length(lev),g,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(C_ffirst,x,fnlevels(g),g,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(C_ffirst,x,attr(g,"N.groups"),g,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(C_ffirst,x,g[[1L]],g[[2L]],na.rm), GRPnames(g))) return(.Call(C_ffirst,x,g[[1L]],g[[2L]],na.rm)) } if(is.null(g)) return(.Call(Cpp_TRA,x,.Call(C_ffirst,x,0L,0L,na.rm),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRA,x,.Call(C_ffirst,x,g[[1L]],g[[2L]],na.rm),g[[2L]],TtI(TRA)) } ffirst.matrix <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(C_ffirstm,x,0L,0L,na.rm,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(C_ffirstm,x,length(lev),g,na.rm,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(C_ffirstm,x,fnlevels(g),g,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_ffirstm,x,attr(g,"N.groups"),g,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(C_ffirstm,x,g[[1L]],g[[2L]],na.rm,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(C_ffirstm,x,g[[1L]],g[[2L]],na.rm,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAm,x,.Call(C_ffirstm,x,0L,0L,na.rm,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAm,x,.Call(C_ffirstm,x,g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TtI(TRA)) } ffirst.data.frame <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) if(drop) return(unlist(.Call(C_ffirstl,x,0L,0L,na.rm))) else return(.Call(C_ffirstl,x,0L,0L,na.rm)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(C_ffirstl,x,length(lev),g,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(C_ffirstl,x,fnlevels(g),g,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(C_ffirstl,x,attr(g,"N.groups"),g,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(C_ffirstl,x,g[[1L]],g[[2L]],na.rm), groups)) return(.Call(C_ffirstl,x,g[[1L]],g[[2L]],na.rm)) } if(is.null(g)) return(.Call(Cpp_TRAl,x,.Call(C_ffirstl,x,0L,0L,na.rm),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAl,x,.Call(C_ffirstl,x,g[[1L]],g[[2L]],na.rm),g[[2L]],TtI(TRA)) } ffirst.list <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) ffirst.data.frame(x, g, TRA, na.rm, use.g.names, drop, ...) ffirst.grouped_df <- function(x, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam[-gn]) return(setAttributes(c(g[[4L]],.Call(C_ffirstl,x[-gn],g[[1L]],g[[2L]],na.rm)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(C_ffirstl,x[-gn],g[[1L]],g[[2L]],na.rm), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]],.Call(C_ffirstl,x,g[[1L]],g[[2L]],na.rm)), ax)) } else return(setAttributes(.Call(C_ffirstl,x,g[[1L]],g[[2L]],na.rm), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(nam[gn], nam[-gn]) return(setAttributes(c(x[gn],.Call(Cpp_TRAl,x[-gn],.Call(C_ffirstl,x[-gn],g[[1L]],g[[2L]],na.rm),g[[2L]],TtI(TRA))), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(Cpp_TRAl,x[-gn],.Call(C_ffirstl,x[-gn],g[[1L]],g[[2L]],na.rm),g[[2L]],TtI(TRA)), ax)) } else return(.Call(Cpp_TRAl,x,.Call(C_ffirstl,x,g[[1L]],g[[2L]],na.rm),g[[2L]],TtI(TRA))) } collapse/R/quick_conversion.R0000644000176200001440000003361114167332054015772 0ustar liggesusers qDF <- function(X, row.names.col = FALSE, keep.attr = FALSE, class = "data.frame") { if(is.atomic(X)) { d <- dim(X) ld <- length(d) if(ld > 1L) { if(ld > 2L) { dn <- dimnames(X) dim(X) <- c(d[1L], bprod(d[-1L])) if(length(dn)) { for (i in 2L:ld) if(is.null(dn[[i]])) dn[[i]] <- seq_len(d[i]) dimnames(X) <- list(dn[[1L]], interact_names(dn[-1L])) # Good? } } if(!isFALSE(row.names.col) && length(force(dn <- dimnames(X))[[1L]])) { res <- c(list(dn[[1L]]), .Call(Cpp_mctl, X, FALSE, 0L)) names(res) <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", dn[[2L]]) attr(res, "row.names") <- .set_row_names(length(dn[[1L]])) } else res <- .Call(Cpp_mctl, X, TRUE, 1L) oldClass(res) <- if(length(class)) class else "data.frame" if(!keep.attr) return(res) ax <- attributes(X) axoth <- names(ax) %!in% c("dim", "dimnames", "class") if(any(axoth)) return(addAttributes(res, ax[axoth])) else return(res) } nam <- names(X) if(is.null(nam) || isFALSE(row.names.col)) { if(is.null(nam)) { res <- `names<-`(list(X), l1orlst(as.character(substitute(X)))) attr(res, "row.names") <- .set_row_names(length(X)) } else { res <- `names<-`(list(`names<-`(X, NULL)), l1orlst(as.character(substitute(X)))) attr(res, "row.names") <- nam } } else { res <- list(nam, `names<-`(X, NULL)) names(res) <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", l1orlst(as.character(substitute(X)))) attr(res, "row.names") <- .set_row_names(length(X)) } return(`oldClass<-`(res, if(length(class)) class else "data.frame")) } if(keep.attr) { # if(all(class(X) == class)) return(X) # better adjust rows ? -> yes, row.names.col should always work ! if(is.null(attr(X, "names"))) attr(X, "names") <- paste0("V", seq_along(unclass(X))) if(is.null(attr(X, "row.names"))) { attr(X, "row.names") <- .set_row_names(length(.subset2(X, 1L))) } else if(!isFALSE(row.names.col)) { ax <- attributes(X) X <- c(list(ax[["row.names"]]), X) ax[["row.names"]] <- .set_row_names(length(X[[1L]])) # this is ok, X is a list ... ax[["names"]] <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", ax[["names"]]) setattributes(X, ax) } if(length(class)) return(`oldClass<-`(X, class)) if(inherits(X, "data.frame")) return(X) return(`oldClass<-`(X, "data.frame")) } nam <- attr(X, "names") rn <- attr(X, "row.names") attributes(X) <- NULL if(is.null(nam)) nam <- paste0("V", seq_along(X)) if(is.null(rn) || is.numeric(rn)) { rn <- .set_row_names(length(X[[1L]])) } else if(!isFALSE(row.names.col)) { X <- c(list(rn), X) rn <- .set_row_names(length(X[[1L]])) nam <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", nam) } # slower: !! # setAttributes(X, pairlist(names = nam, row.names = rn, class = if(length(class)) class else "data.frame")) names(X) <- nam attr(X, "row.names") <- rn # This can be inefficient for large data.frames if character rn !! oldClass(X) <- if(length(class)) class else "data.frame" X } qDT_raw <- function(X, row.names.col, keep.attr, DT_class, X_nam) { if(is.atomic(X)) { d <- dim(X) ld <- length(d) if(ld > 1L) { if(ld > 2L) { dn <- dimnames(X) dim(X) <- c(d[1L], bprod(d[-1L])) if(length(dn)) { for (i in 2L:ld) if(is.null(dn[[i]])) dn[[i]] <- seq_len(d[i]) dimnames(X) <- list(dn[[1L]], interact_names(dn[-1L])) # Good? } } if(!isFALSE(row.names.col) && length(force(dn <- dimnames(X))[[1L]])) { res <- c(list(dn[[1L]]), .Call(Cpp_mctl, X, FALSE, 0L)) names(res) <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", dn[[2L]]) attr(res, "row.names") <- .set_row_names(length(dn[[1L]])) } else res <- .Call(Cpp_mctl, X, TRUE, 2L) oldClass(res) <- DT_class if(!keep.attr) return(res) ax <- attributes(X) axoth <- names(ax) %!in% c("dim", "dimnames", "class") return(if(any(axoth)) addAttributes(res, ax[axoth]) else res) } if(isFALSE(row.names.col) || is.null(nam <- names(X))) { res <- `names<-`(list(X), X_nam) } else { res <- list(nam, `names<-`(X, NULL)) names(res) <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", X_nam) } attr(res, "row.names") <- .set_row_names(length(X)) return(`oldClass<-`(res, DT_class)) } if(keep.attr) { # if(all(class(X) == DT_class)) return(X) # better adjust rows ? -> yes, row.names.col should always work ! if(is.null(attr(X, "names"))) attr(X, "names") <- paste0("V", seq_along(unclass(X))) if(!isFALSE(row.names.col) && length(rn <- attr(X, "row.names"))) { ax <- attributes(X) X <- c(list(rn), X) ax[["names"]] <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", ax[["names"]]) setattributes(X, ax) } if(!length(DT_class) && inherits(X, c("data.table", "data.frame"))) return(X) attr(X, "row.names") <- .set_row_names(length(.subset2(X, 1L))) } else { nam <- attr(X, "names") rncol <- !isFALSE(row.names.col) && length(rn <- attr(X, "row.names")) attributes(X) <- NULL if(is.null(nam)) nam <- paste0("V", seq_along(X)) if(rncol) { X <- c(list(rn), X) nam <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", nam) } names(X) <- nam attr(X, "row.names") <- .set_row_names(length(X[[1L]])) } return(`oldClass<-`(X, DT_class)) } qDT <- function(X, row.names.col = FALSE, keep.attr = FALSE, class = c("data.table", "data.frame")) { alc(qDT_raw(X, row.names.col, keep.attr, if(length(class) || keep.attr) class else c("data.table", "data.frame"), if(is.atomic(X) && !is.matrix(X)) l1orlst(as.character(substitute(X))) else NULL)) } qTBL <- function(X, row.names.col = FALSE, keep.attr = FALSE, class = c("tbl_df", "tbl", "data.frame")) { qDT_raw(X, row.names.col, keep.attr, if(length(class) || keep.attr) class else c("tbl_df", "tbl", "data.frame"), if(is.atomic(X) && !is.matrix(X)) l1orlst(as.character(substitute(X))) else NULL) } qM <- function(X, keep.attr = FALSE, class = NULL) { if(keep.attr) { if(is.atomic(X)) { if(length(class)) oldClass(X) <- class if(is.matrix(X)) return(X) if(is.array(X)) { d <- dim(X) dn <- dimnames(X) dim(X) <- c(d[1L], bprod(d[-1L])) if(length(dn)) { for (i in 2L:length(d)) if(is.null(dn[[i]])) dn[[i]] <- seq_len(d[i]) dimnames(X) <- list(dn[[1L]], interact_names(dn[-1L])) # Good? } } else { nam <- l1orlst(as.character(substitute(X))) # needed before X is changed !! dim(X) <- c(length(X), 1L) dimnames(X) <- list(names(X), nam) names(X) <- NULL # if(is.object(X)) oldClass(X) <- NULL Necessary ? Can also have factor or date matrices. Check this ! # -> qM(wlddev$date, TRUE) is a vector !! } return(X) } ax <- attributes(X) res <- do.call(cbind, X) rn <- ax[["row.names"]] if(!(is.numeric(rn) || is.null(rn) || rn[1L] == "1")) dimnames(res) <- list(rn, ax[["names"]]) if(length(class)) oldClass(res) <- class axoth <- names(ax) %!in% c("names", "row.names", "class") if(any(axoth)) return(addAttributes(res, ax[axoth])) return(res) } if(is.atomic(X)) { if(!is.array(X)) { r <- matrix(X, ncol = 1, dimnames = list(names(X), l1orlst(as.character(substitute(X))))) if(is.null(class)) return(r) else return(`oldClass<-`(r, class)) } d <- dim(X) dn <- dimnames(X) attributes(X) <- NULL ld <- length(d) if(ld == 2L) { # setattributes(X, pairlist(dim = d, dimnames = dn)) # Not faster ! dim(X) <- d dimnames(X) <- dn } else { dim(X) <- c(d[1L], bprod(d[-1L])) if(length(dn)) { for (i in 2L:ld) if(is.null(dn[[i]])) dn[[i]] <- seq_len(d[i]) dimnames(X) <- list(dn[[1L]], interact_names(dn[-1L])) # Good? } } if(length(class)) oldClass(X) <- class return(X) } rn <- attr(X, "row.names") res <- do.call(cbind, X) if(is.object(res)) attributes(res) <- attributes(res)[c("dim", "dimnames")] # if X is list of time-series, do.call(cbind, X) creates ts-matrix. if(!(is.numeric(rn) || is.null(rn) || rn[1L] == "1")) dimnames(res) <- list(rn, attr(X, "names")) if(length(class)) oldClass(res) <- class res } # Same speed # tf1 <- function(res) { # res <- do.call(cbind, res) # if(is.object(res)) attributes(res) <- attributes(res)[c("dim", "dimnames")] # res # } # # tf2 <- function(res) { # res <- do.call(cbind, res) # if(is.object(res)) setAttributes(res, attributes(res)[c("dim", "dimnames")]) # } ## Old Versions: # # before collapse 1.4.0: # qM_old <- function(X) { # if(is.atomic(X)) { # d <- dim(X) # ld <- length(d) # if(ld > 2L) { # dn <- dimnames(X) # dim(X) <- c(d[1L], bprod(d[-1L])) # if(length(dn)) { # for (i in 2L:ld) if(is.null(dn[[i]])) dn[[i]] <- seq_len(d[i]) # dimnames(X) <- list(dn[[1L]], interact_names(dn[-1L])) # Good? # } # return(X) # } # if(ld == 2L) return(X) # return(matrix(X, ncol = 1, dimnames = list(names(X), l1orlst(as.character(substitute(X)))))) # } # rn <- attr(X, "row.names") # res <- do.call(cbind, X) # if(!(is.numeric(rn) || is.null(rn) || rn[1L] == "1")) dimnames(res) <- list(rn, attr(X, "names")) # res # } # # before collapse 1.4.0: # qDF <- function(X, row.names.col = FALSE) { # if(is.atomic(X)) { # d <- dim(X) # ld <- length(d) # if(ld >= 2L) { # if(ld != 2L) { # dn <- dimnames(X) # dim(X) <- c(d[1L], bprod(d[-1L])) # if(length(dn)) { # for (i in 2L:ld) if(is.null(dn[[i]])) dn[[i]] <- seq_len(d[i]) # dimnames(X) <- list(dn[[1L]], interact_names(dn[-1L])) # Good? # } # } # if(!isFALSE(row.names.col) && length(force(dn <- dimnames(X))[[1L]])) { # res <- c(list(dn[[1L]]), .Call(Cpp_mctl, X, FALSE, 0L)) # names(res) <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", dn[[2L]]) # attr(res, "row.names") <- .set_row_names(length(dn[[1L]])) # return(`oldClass<-`(res, "data.frame")) # } # return(.Call(Cpp_mctl, X, TRUE, 1L)) # } # nam <- names(X) # if(isFALSE(row.names.col) || is.null(nam)) { # if(is.null(nam)) { # res <- `names<-`(list(X), l1orlst(as.character(substitute(X)))) # attr(res, "row.names") <- .set_row_names(length(X)) # } else { # res <- `names<-`(list(`names<-`(X, NULL)), l1orlst(as.character(substitute(X)))) # attr(res, "row.names") <- nam # } # } else { # res <- list(nam, `names<-`(X, NULL)) # names(res) <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", l1orlst(as.character(substitute(X)))) # attr(res, "row.names") <- .set_row_names(length(X)) # } # return(`oldClass<-`(res, "data.frame")) # } # # if(inherits(X, "data.frame")) return(X) # if(is.null(attr(X, "names"))) attr(X, "names") <- paste0("V", seq_along(unclass(X))) # if(is.null(attr(X, "row.names"))) { # attr(X, "row.names") <- .set_row_names(fnrow2(X)) # } else if(!isFALSE(row.names.col)) { # ax <- attributes(X) # X <- c(list(ax[["row.names"]]), X) # best ?? # ax[["row.names"]] <- .set_row_names(length(X[[1L]])) # this is ok, X is a list ... # ax[["names"]] <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", ax[["names"]]) # setattributes(X, ax) # } # oldClass(X) <- "data.frame" # X # } # # before collapse 1.4.0: # qDT <- function(X, row.names.col = FALSE) { # if(is.atomic(X)) { # d <- dim(X) # ld <- length(d) # if(ld >= 2L) { # if(ld != 2L) { # dn <- dimnames(X) # dim(X) <- c(d[1L], bprod(d[-1L])) # if(length(dn)) { # for (i in 2L:ld) if(is.null(dn[[i]])) dn[[i]] <- seq_len(d[i]) # dimnames(X) <- list(dn[[1L]], interact_names(dn[-1L])) # Good? # } # } # if(!isFALSE(row.names.col) && length(force(dn <- dimnames(X))[[1L]])) { # res <- c(list(dn[[1L]]), .Call(Cpp_mctl, X, FALSE, 0L)) # names(res) <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", dn[[2L]]) # attr(res, "row.names") <- .set_row_names(length(dn[[1L]])) # return(`oldClass<-`(res, c("data.table","data.frame"))) # } # return(.Call(Cpp_mctl, X, TRUE, 2L)) # } # if(isFALSE(row.names.col) || is.null(nam <- names(X))) { # res <- `names<-`(list(X), l1orlst(as.character(substitute(X)))) # } else { # res <- list(nam, `names<-`(X, NULL)) # names(res) <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", l1orlst(as.character(substitute(X)))) # } # attr(res, "row.names") <- .set_row_names(length(X)) # return(`oldClass<-`(res, c("data.table","data.frame"))) # } # if(inherits(X, "data.table")) return(X) # if(is.null(attr(X, "names"))) attr(X, "names") <- paste0("V", seq_along(unclass(X))) # if(!isFALSE(row.names.col) && length(rn <- attr(X, "row.names"))) { # ax <- attributes(X) # X <- c(list(rn), X) # ax[["names"]] <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", ax[["names"]]) # setattributes(X, ax) # } # attr(X, "row.names") <- .set_row_names(fnrow2(X)) # oldClass(X) <- c("data.table","data.frame") # X # } collapse/R/flm.R0000644000176200001440000001220314170063061013152 0ustar liggesusers # formatcoef <- function(r, X, y) { # if(!is.matrix(r)) dim(r) <- c(length(r), 1L) # `dimnames<-`(r, list(dimnames(X)[[2L]], if(is.matrix(y)) dimnames(y)[[2L]] else NULL)) # } # formatcoef <- function(r, y, X, drop) { # if(is.matrix(r)) return(`dimnames<-`(r, list(dimnames(X)[[2L]], if(is.matrix(y)) dimnames(y)[[2L]] else NULL))) # if(drop) return(name) # ..... # # # list(dim = c(dim(X)[2L], 1L), dimnames = list(dimnames(X)[[2L]], NULL)) # } flm <- function(y, X, w = NULL, add.icpt = FALSE, # sparse = FALSE, return.raw = FALSE, # only.coef method = c("lm", "solve", "qr", "arma", "chol", "eigen"), eigen.method = 3L, ...) { if(add.icpt) X <- cbind(`(Intercept)` = 1, X) n <- dim(X)[1L] if(n != NROW(y)) stop("NROW(y) must match nrow(X)") # if(sparse) X <- as(X, "dgCMatrix") # what about y ?? if(length(w)) { if(length(w) != n) stop("w must be numeric and length(w) == nrow(X)") wts <- sqrt(w) if(return.raw) return(switch(method[1L], lm = { z <- .lm.fit(X * wts, y * wts, ...) z$residuals <- z$residuals / wts # This is correct !!! z }, solve = (function(xw) solve(crossprod(xw), crossprod(xw, y * wts), ...))(X * wts), qr = qr.coef(qr(X * wts, ...), y * wts), arma = getenvFUN("RcppArmadillo_fastLmPure")(X * wts, y * wts), # .Call("_RcppArmadillo_fastLm_impl", X * wts, y * wts, PACKAGE = "RcppArmadillo"), chol = (function(xw) chol2inv(chol(crossprod(xw), ...)) %*% crossprod(xw, y * wts))(X * wts), eigen = { z <- getenvFUN("RcppEigen_fastLmPure")(X * wts, y * wts, eigen.method) # .Call("RcppEigen_fastLm_Impl", X * wts, y * wts, eigen.method, PACKAGE = "RcppEigen") z$residuals <- z$residuals / wts # This is correct !!! z$fitted.values <- y - z$residuals z }, stop("Unknown method!"))) ar <- if(is.matrix(y)) list(dim = c(dim(X)[2L], dim(y)[2L]), dimnames = list(dimnames(X)[[2L]], dimnames(y)[[2L]])) else list(dim = c(dim(X)[2L], 1L), dimnames = list(dimnames(X)[[2L]], NULL)) return(`attributes<-`(switch(method[1L], lm = .lm.fit(X * wts, y * wts, ...)[[2L]], solve = (function(xw) solve(crossprod(xw), crossprod(xw, y * wts), ...))(X * wts), qr = qr.coef(qr(`dimnames<-`(X, NULL) * wts, ...), y * wts), arma = getenvFUN("RcppArmadillo_fastLmPure")(X * wts, y * wts)[[1L]], # .Call("_RcppArmadillo_fastLm_impl", X * wts, y * wts, PACKAGE = "RcppArmadillo"), chol = (function(xw) chol2inv(chol(crossprod(xw), ...)) %*% crossprod(xw, y * wts))(X * wts), eigen = getenvFUN("RcppEigen_fastLmPure")(X * wts, y * wts, eigen.method)[[1L]], # .Call("RcppEigen_fastLm_Impl", X * wts, y * wts, eigen.method, PACKAGE = "RcppEigen") stop("Unknown method!")), ar)) } if(return.raw) return(switch(method[1L], lm = .lm.fit(X, y, ...), solve = solve(crossprod(X), crossprod(X, y), ...), qr = qr.coef(qr(X, ...), y), arma = getenvFUN("RcppArmadillo_fastLmPure")(X, y), chol = chol2inv(chol(crossprod(X), ...)) %*% crossprod(X, y), eigen = getenvFUN("RcppEigen_fastLmPure")(X, y, eigen.method), stop("Unknown method!"))) ar <- if(is.matrix(y)) list(dim = c(dim(X)[2L], dim(y)[2L]), dimnames = list(dimnames(X)[[2L]], dimnames(y)[[2L]])) else list(dim = c(dim(X)[2L], 1L), dimnames = list(dimnames(X)[[2L]], NULL)) `attributes<-`(switch(method[1L], lm = .lm.fit(X, y, ...)[[2L]], solve = solve(crossprod(X), crossprod(X, y), ...), qr = qr.coef(qr(`dimnames<-`(X, NULL), ...), y), arma = getenvFUN("RcppArmadillo_fastLmPure")(X, y)[[1L]], chol = chol2inv(chol(crossprod(X), ...)) %*% crossprod(X, y), eigen = getenvFUN("RcppEigen_fastLmPure")(X, y, eigen.method)[[1L]], stop("Unknown method!")), ar) # if(!return.raw) return(switch(method[1L], solve = formatcoef(res$coefficients, X, y), res$coefficients)) # res } # Slower than using chol2inv (discarded) # lmchol2 <- function(X, y) { # ch <- chol(crossprod(X)) # backsolve(ch, forwardsolve(ch, crossprod(X, y), upper = TRUE, trans = TRUE)) # } # getDLLRegisteredRoutines("RcppArmadillo") # # identical(getNativeSymbolInfo("_RcppArmadillo_fastLm_impl", PACKAGE = "RcppArmadillo"), # get0("_RcppArmadillo_fastLm_impl", envir = getNamespace("RcppArmadillo"))) # # # microbenchmark::microbenchmark(A = getNativeSymbolInfo("_RcppArmadillo_fastLm_impl", PACKAGE = "RcppArmadillo"), # B = get0("_RcppArmadillo_fastLm_impl", envir = getNamespace("RcppArmadillo"))) # # .Call(get0("_RcppArmadillo_fastLm_impl", envir = getNamespace("RcppArmadillo")), X, y) # # .Call("_RcppArmadillo_fastLm_impl", X, y, PACKAGE = "RcppArmadillo") collapse/R/fdiff_fgrowth.R0000644000176200001440000004412214174223734015230 0ustar liggesusers # For principle innovations of this code see flag.R and flag.cpp # Helper functions checkld <- function(...) { if(any(names(list(...)) == "logdiff")) { warning("argument 'logdiff' was renamed to 'log'") TRUE } else FALSE } baselog <- base::log fdiff <- function(x, n = 1, diff = 1, ...) UseMethod("fdiff") # , x fdiff.default <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(UseMethod("fdiff", unclass(x))) if(!missing(...)) if(checkld(...)) log <- list(...)[["logdiff"]] else unused_arg_action(match.call(), ...) if(log) x <- baselog(x) if(is.null(g)) return(.Call(Cpp_fdiffgrowth,x,n,diff,fill,0L,0L,NULL,G_t(t),1L+log,rho,stubs,1)) g <- G_guo(g) .Call(Cpp_fdiffgrowth,x,n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),1L+log,rho,stubs,1) } fdiff.pseries <- function(x, n = 1, diff = 1, fill = NA, log = FALSE, rho = 1, stubs = TRUE, ...) { if(!missing(...)) if(checkld(...)) log <- list(...)[["logdiff"]] else unused_arg_action(match.call(), ...) if(log) x <- baselog(x) index <- unclass(getpix(attr(x, "index"))) if(length(index) > 2L) index <- list(finteraction(index[-length(index)]), index[[length(index)]]) g <- index[[1L]] t <- index[[2L]] tlev <- attr(t, "levels") oldopts <- options(warn = -1L) on.exit(options(oldopts)) if(is.finite(as.integer(tlev[1L]))) t <- as.integer(tlev)[t] if(is.matrix(x)) .Call(Cpp_fdiffgrowthm,x,n,diff,fill,fnlevels(g),g,NULL,t,1L+log,rho,stubs,1) else .Call(Cpp_fdiffgrowth,x,n,diff,fill,fnlevels(g),g,NULL,t,1L+log,rho,stubs,1) } fdiff.matrix <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, ...) { if(!missing(...)) if(checkld(...)) log <- list(...)[["logdiff"]] else unused_arg_action(match.call(), ...) if(log) x <- baselog(x) if(is.null(g)) return(.Call(Cpp_fdiffgrowthm,x,n,diff,fill,0L,0L,NULL,G_t(t),1L+log,rho,stubs,1)) g <- G_guo(g) .Call(Cpp_fdiffgrowthm,x,n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),1L+log,rho,stubs,1) } fdiff.grouped_df <- function(x, n = 1, diff = 1, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, keep.ids = TRUE, ...) { if(!missing(...)) if(checkld(...)) log <- list(...)[["logdiff"]] else unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) tsym <- all.vars(substitute(t)) nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) if(length(tsym) && !anyNA(tn <- match(tsym, nam))) { if(length(tn) == 1L) { if(any(gn == tn)) stop("timevar coincides with grouping variables!") t <- .subset2(x, tn) } else { if(any(gn %in% tn)) stop("timevar coincides with grouping variables!") t <- .subset(x, tn) } gn <- c(gn, tn) } cld <- function(x) if(log) fdapply(x, baselog) else x if(length(gn)) { ax <- attributes(x) res <- .Call(Cpp_fdiffgrowthl,cld(.subset(x, -gn)),n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),1L+log,rho,stubs,1) if(keep.ids) res <- c(.subset(x, gn), res) ax[["names"]] <- names(res) # Works for multiple lags / differences ! return(setAttributes(res, ax)) } .Call(Cpp_fdiffgrowthl,cld(x),n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),1L+log,rho,stubs,1) } fdiff.data.frame <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, ...) { if(!missing(...)) if(checkld(...)) log <- list(...)[["logdiff"]] else unused_arg_action(match.call(), ...) if(log) x <- fdapply(x, baselog) if(is.null(g)) return(.Call(Cpp_fdiffgrowthl,x,n,diff,fill,0L,0L,NULL,G_t(t),1L+log,rho,stubs,1)) g <- G_guo(g) .Call(Cpp_fdiffgrowthl,x,n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),1L+log,rho,stubs,1) } fdiff.list <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, ...) fdiff.data.frame(x, n, diff, g, t, fill, log, rho, stubs, ...) fdiff.pdata.frame <- function(x, n = 1, diff = 1, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, ...) { if(!missing(...)) if(checkld(...)) log <- list(...)[["logdiff"]] else unused_arg_action(match.call(), ...) if(log) x <- fdapply(x, baselog) index <- unclass(getpix(attr(x, "index"))) if(length(index) > 2L) index <- list(finteraction(index[-length(index)]), index[[length(index)]]) g <- index[[1L]] t <- index[[2L]] tlev <- attr(t, "levels") oldopts <- options(warn = -1L) on.exit(options(oldopts)) if(is.finite(as.integer(tlev[1L]))) t <- as.integer(tlev)[t] .Call(Cpp_fdiffgrowthl,x,n,diff,fill,fnlevels(g),g,NULL,t,1L+log,rho,stubs,1) } fgrowth <- function(x, n = 1, diff = 1, ...) UseMethod("fgrowth") # , x fgrowth.default <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(UseMethod("fgrowth", unclass(x))) if(!missing(...)) unused_arg_action(match.call(), ...) if(logdiff) x <- if(scale == 1) baselog(x) else scale * baselog(x) if(is.null(g)) return(.Call(Cpp_fdiffgrowth,x,n,diff,fill,0L,0L,NULL,G_t(t),4L-logdiff,scale,stubs,power)) g <- G_guo(g) .Call(Cpp_fdiffgrowth,x,n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),4L-logdiff,scale,stubs,power) } fgrowth.pseries <- function(x, n = 1, diff = 1, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(logdiff) x <- if(scale == 1) baselog(x) else scale * baselog(x) index <- unclass(getpix(attr(x, "index"))) if(length(index) > 2L) index <- list(finteraction(index[-length(index)]), index[[length(index)]]) g <- index[[1L]] t <- index[[2L]] tlev <- attr(t, "levels") oldopts <- options(warn = -1L) on.exit(options(oldopts)) if(is.finite(as.integer(tlev[1L]))) t <- as.integer(tlev)[t] if(is.matrix(x)) .Call(Cpp_fdiffgrowthm,x,n,diff,fill,fnlevels(g),g,NULL,t,4L-logdiff,scale,stubs,power) else .Call(Cpp_fdiffgrowth,x,n,diff,fill,fnlevels(g),g,NULL,t,4L-logdiff,scale,stubs,power) } fgrowth.matrix <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(logdiff) x <- if(scale == 1) baselog(x) else scale * baselog(x) if(is.null(g)) return(.Call(Cpp_fdiffgrowthm,x,n,diff,fill,0L,0L,NULL,G_t(t),4L-logdiff,scale,stubs,power)) g <- G_guo(g) .Call(Cpp_fdiffgrowthm,x,n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),4L-logdiff,scale,stubs,power) } fgrowth.grouped_df <- function(x, n = 1, diff = 1, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, keep.ids = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) tsym <- all.vars(substitute(t)) nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) if(length(tsym) && !anyNA(tn <- match(tsym, nam))) { if(length(tn) == 1L) { if(any(gn == tn)) stop("timevar coincides with grouping variables!") t <- .subset2(x, tn) } else { if(any(gn %in% tn)) stop("timevar coincides with grouping variables!") t <- .subset(x, tn) } gn <- c(gn, tn) } cld <- function(x) if(!logdiff) x else if(scale != 1) fdapply(x, function(y) scale * baselog(y)) else fdapply(x, baselog) if(length(gn)) { ax <- attributes(x) res <- .Call(Cpp_fdiffgrowthl,cld(.subset(x, -gn)),n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),4L-logdiff,scale,stubs,power) if(keep.ids) res <- c(.subset(x, gn), res) ax[["names"]] <- names(res) # Works for multiple lags / differences ! return(setAttributes(res, ax)) } .Call(Cpp_fdiffgrowthl,cld(x),n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),4L-logdiff,scale,stubs,power) } fgrowth.data.frame <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(logdiff) x <- if(scale == 1) fdapply(x, baselog) else fdapply(x, function(y) scale * baselog(y)) if(is.null(g)) return(.Call(Cpp_fdiffgrowthl,x,n,diff,fill,0L,0L,NULL,G_t(t),4L-logdiff,scale,stubs,power)) g <- G_guo(g) .Call(Cpp_fdiffgrowthl,x,n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),4L-logdiff,scale,stubs,power) } fgrowth.list <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, ...) fgrowth.data.frame(x, n, diff, g, t, fill, logdiff, scale, power, stubs, ...) fgrowth.pdata.frame <- function(x, n = 1, diff = 1, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(logdiff) x <- if(scale == 1) fdapply(x, baselog) else fdapply(x, function(y) scale * baselog(y)) index <- unclass(getpix(attr(x, "index"))) if(length(index) > 2L) index <- list(finteraction(index[-length(index)]), index[[length(index)]]) g <- index[[1L]] t <- index[[2L]] tlev <- attr(t, "levels") oldopts <- options(warn = -1L) on.exit(options(oldopts)) if(is.finite(as.integer(tlev[1L]))) t <- as.integer(tlev)[t] .Call(Cpp_fdiffgrowthl,x,n,diff,fill,fnlevels(g),g,NULL,t,4L-logdiff,scale,stubs,power) } # Operator data frame methods templates DG_data_frame_template <- function(x, n = 1, diff = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, return = 1L, rho = 1, stubs = TRUE, keep.ids = TRUE, power = 1, ...) { # , message = 2L, power = 1 if(!missing(...)) unused_arg_action(match.call(), ...) cld <- function(y) switch(return, y, fdapply(y, baselog), if(rho == 1) fdapply(y, baselog) else fdapply(y, function(k) rho * baselog(k)), y) if(is.call(by) || is.call(t)) { ax <- attributes(x) class(x) <- NULL nam <- names(x) if(is.call(by)) { if(length(by) == 3L) { cols <- ckmatch(all.vars(by[[2L]]), nam) gn <- ckmatch(all.vars(by[[3L]]), nam) } else { gn <- ckmatch(all.vars(by), nam) cols <- if(is.null(cols)) seq_along(x)[-gn] else cols2int(cols, x, nam) } by <- G_guo(if(length(gn) == 1L) x[[gn]] else x[gn]) if(!keep.ids) gn <- NULL } else { gn <- NULL if(length(cols)) cols <- cols2int(cols, x, nam) by <- if(is.null(by)) list(0L, 0L, NULL) else G_guo(by) } if(is.call(t)) { tn <- ckmatch(all.vars(t), nam) t1 <- length(tn) == 1L t <- if(t1) x[[tn]] else x[tn] cols <- if(is.null(cols)) seq_along(x)[-tn] else if(t1) cols[cols != tn] else fsetdiff(cols, tn) if(keep.ids) gn <- c(gn, tn) } res <- if(length(gn)) c(x[gn], .Call(Cpp_fdiffgrowthl,cld(x[cols]),n,diff,fill,by[[1L]],by[[2L]],by[[3L]],G_t(t),return,rho,stubs,power)) else .Call(Cpp_fdiffgrowthl,cld(x[cols]),n,diff,fill,by[[1L]],by[[2L]],by[[3L]],G_t(t),return,rho,stubs,power) ax[["names"]] <- names(res) return(setAttributes(res, ax)) } else if(length(cols)) { # Needs to be done like this, otherwise list-subsetting drops attributes ! ax <- attributes(x) class(x) <- NULL x <- x[cols2int(cols, x, names(x), FALSE)] ax[["names"]] <- names(x) setattributes(x, ax) } if(is.null(by)) return(.Call(Cpp_fdiffgrowthl,cld(x),n,diff,fill,0L,0L,NULL,G_t(t),return,rho,stubs,power)) by <- G_guo(by) .Call(Cpp_fdiffgrowthl,cld(x),n,diff,fill,by[[1L]],by[[2L]],by[[3L]],G_t(t),return,rho,stubs,power) } DG_pdata_frame_template <- function(x, n = 1, diff = 1, cols = is.numeric, fill = NA, return = 1L, rho = 1, stubs = TRUE, keep.ids = TRUE, power = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) ax <- attributes(x) nam <- ax[["names"]] index <- unclass(getpix(ax[["index"]])) if(keep.ids) { gn <- which(nam %in% names(index)) if(length(gn) && is.null(cols)) cols <- seq_along(unclass(x))[-gn] } else gn <- NULL if(length(index) > 2L) index <- list(finteraction(index[-length(index)]), index[[length(index)]]) g <- index[[1L]] t <- index[[2L]] tlev <- attr(t, "levels") oldopts <- options(warn = -1L) on.exit(options(oldopts)) if(is.finite(as.integer(tlev[1L]))) t <- as.integer(tlev)[t] cld <- function(y) switch(return, y, fdapply(y, baselog), if(rho == 1) fdapply(y, baselog) else fdapply(y, function(k) rho * baselog(k)), y) if(length(cols)) cols <- cols2int(cols, x, nam, FALSE) if(length(gn) && length(cols)) { class(x) <- NULL # Works for multiple lags ! res <- c(x[gn], .Call(Cpp_fdiffgrowthl,cld(x[cols]),n,diff,fill,fnlevels(g),g,NULL,t,return,rho,stubs,power)) ax[["names"]] <- names(res) return(setAttributes(res, ax)) } else if(!length(gn)) # could speed up ? return(.Call(Cpp_fdiffgrowthl,cld(fcolsubset(x, cols)),n,diff,fill,fnlevels(g),g,NULL,t,return,rho,stubs,power)) .Call(Cpp_fdiffgrowthl,cld(x),n,diff,fill,fnlevels(g),g,NULL,t,return,rho,stubs,power) } # Difference Operator (masks stats::D) # use xt instead of by ? # setGeneric("D") D <- function(x, n = 1, diff = 1, ...) UseMethod("D") # , x D.expression <- function(x, ...) if(missing(x)) stats::D(...) else stats::D(x, ...) D.call <- function(x, ...) if(missing(x)) stats::D(...) else stats::D(x, ...) D.name <- function(x, ...) if(missing(x)) stats::D(...) else stats::D(x, ...) D.default <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, rho = 1, stubs = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fdiff.matrix(x, n, diff, g, t, fill, FALSE, rho, stubs, ...)) fdiff.default(x, n, diff, g, t, fill, FALSE, rho, stubs, ...) } D.pseries <- function(x, n = 1, diff = 1, fill = NA, rho = 1, stubs = TRUE, ...) fdiff.pseries(x, n, diff, fill, FALSE, rho, stubs, ...) # setOldClass("pseries") # setMethod("D", signature(expr = "pseries"), D.pseries) D.matrix <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, rho = 1, stubs = TRUE, ...) fdiff.matrix(x, n, diff, g, t, fill, FALSE, rho, stubs, ...) # setMethod("D", "matrix") D.grouped_df <- function(x, n = 1, diff = 1, t = NULL, fill = NA, rho = 1, stubs = TRUE, keep.ids = TRUE, ...) { x <- x # because of piped calls -> "." is not in global environment ... eval(substitute(fdiff.grouped_df(x, n, diff, t, fill, FALSE, rho, stubs, keep.ids, ...))) } D.data.frame <- function(x, n = 1, diff = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, rho = 1, stubs = TRUE, keep.ids = TRUE, ...) DG_data_frame_template(x, n, diff, by, t, cols, fill, 1L, rho, stubs, keep.ids, ...) D.list <- D.data.frame D.pdata.frame <- function(x, n = 1, diff = 1, cols = is.numeric, fill = NA, rho = 1, stubs = TRUE, keep.ids = TRUE, ...) DG_pdata_frame_template(x, n, diff, cols, fill, 1L, rho, stubs, keep.ids, ...) # Log-Difference Operator Dlog <- function(x, n = 1, diff = 1, ...) UseMethod("Dlog") # , x Dlog.default <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, rho = 1, stubs = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fdiff.matrix(x, n, diff, g, t, fill, TRUE, rho, stubs, ...)) fdiff.default(x, n, diff, g, t, fill, TRUE, rho, stubs, ...) } Dlog.pseries <- function(x, n = 1, diff = 1, fill = NA, rho = 1, stubs = TRUE, ...) fdiff.pseries(x, n, diff, fill, TRUE, rho, stubs, ...) Dlog.matrix <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, rho = 1, stubs = TRUE, ...) fdiff.matrix(x, n, diff, g, t, fill, TRUE, rho, stubs, ...) Dlog.grouped_df <- function(x, n = 1, diff = 1, t = NULL, fill = NA, rho = 1, stubs = TRUE, keep.ids = TRUE, ...) { x <- x eval(substitute(fdiff.grouped_df(x, n, diff, t, fill, TRUE, rho, stubs, keep.ids, ...))) } Dlog.data.frame <- function(x, n = 1, diff = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, rho = 1, stubs = TRUE, keep.ids = TRUE, ...) DG_data_frame_template(x, n, diff, by, t, cols, fill, 2L, rho, stubs, keep.ids, ...) Dlog.list <- Dlog.data.frame Dlog.pdata.frame <- function(x, n = 1, diff = 1, cols = is.numeric, fill = NA, rho = 1, stubs = TRUE, keep.ids = TRUE, ...) DG_pdata_frame_template(x, n, diff, cols, fill, 2L, rho, stubs, keep.ids, ...) # Growth Operator G <- function(x, n = 1, diff = 1, ...) UseMethod("G") # , x G.default <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fgrowth.matrix(x, n, diff, g, t, fill, logdiff, scale, power, stubs, ...)) fgrowth.default(x, n, diff, g, t, fill, logdiff, scale, power, stubs, ...) } G.pseries <- function(x, n = 1, diff = 1, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, ...) fgrowth.pseries(x, n, diff, fill, logdiff, scale, power, stubs, ...) G.matrix <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, ...) fgrowth.matrix(x, n, diff, g, t, fill, logdiff, scale, power, stubs, ...) G.grouped_df <- function(x, n = 1, diff = 1, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, keep.ids = TRUE, ...) { x <- x eval(substitute(fgrowth.grouped_df(x, n, diff, t, fill, logdiff, scale, power, stubs, keep.ids, ...))) } G.data.frame <- function(x, n = 1, diff = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, keep.ids = TRUE, ...) DG_data_frame_template(x, n, diff, by, t, cols, fill, 4L-logdiff, scale, stubs, keep.ids, power, ...) G.list <- G.data.frame G.pdata.frame <- function(x, n = 1, diff = 1, cols = is.numeric, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, keep.ids = TRUE, ...) DG_pdata_frame_template(x, n, diff, cols, fill, 4L-logdiff, scale, stubs, keep.ids, power, ...) collapse/R/RcppExports.R0000644000176200001440000001744114176661433014713 0ustar liggesusers# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 TRACpp <- function(x, xAG, g = 0L, ret = 1L) { .Call(`_collapse_TRACpp`, x, xAG, g, ret) } TRAlCpp <- function(x, xAG, g = 0L, ret = 1L) { .Call(`_collapse_TRAlCpp`, x, xAG, g, ret) } TRAmCpp <- function(x, xAG, g = 0L, ret = 1L) { .Call(`_collapse_TRAmCpp`, x, xAG, g, ret) } fndistinctCpp <- function(x, ng = 0L, g = 0L, gs = NULL, narm = TRUE) { .Call(`_collapse_fndistinctCpp`, x, ng, g, gs, narm) } fndistinctlCpp <- function(x, ng = 0L, g = 0L, gs = NULL, narm = TRUE, drop = TRUE) { .Call(`_collapse_fndistinctlCpp`, x, ng, g, gs, narm, drop) } fndistinctmCpp <- function(x, ng = 0L, g = 0L, gs = NULL, narm = TRUE, drop = TRUE) { .Call(`_collapse_fndistinctmCpp`, x, ng, g, gs, narm, drop) } BWCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, theta = 1, set_mean = 0, B = FALSE, fill = FALSE) { .Call(`_collapse_BWCpp`, x, ng, g, gs, w, narm, theta, set_mean, B, fill) } BWmCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, theta = 1, set_mean = 0, B = FALSE, fill = FALSE) { .Call(`_collapse_BWmCpp`, x, ng, g, gs, w, narm, theta, set_mean, B, fill) } BWlCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, theta = 1, set_mean = 0, B = FALSE, fill = FALSE) { .Call(`_collapse_BWlCpp`, x, ng, g, gs, w, narm, theta, set_mean, B, fill) } fbstatsCpp <- function(x, ext = FALSE, ng = 0L, g = 0L, npg = 0L, pg = 0L, w = NULL, stable_algo = TRUE, array = TRUE, setn = TRUE, gn = NULL) { .Call(`_collapse_fbstatsCpp`, x, ext, ng, g, npg, pg, w, stable_algo, array, setn, gn) } fbstatsmCpp <- function(x, ext = FALSE, ng = 0L, g = 0L, npg = 0L, pg = 0L, w = NULL, stable_algo = TRUE, array = TRUE, gn = NULL) { .Call(`_collapse_fbstatsmCpp`, x, ext, ng, g, npg, pg, w, stable_algo, array, gn) } fbstatslCpp <- function(x, ext = FALSE, ng = 0L, g = 0L, npg = 0L, pg = 0L, w = NULL, stable_algo = TRUE, array = TRUE, gn = NULL) { .Call(`_collapse_fbstatslCpp`, x, ext, ng, g, npg, pg, w, stable_algo, array, gn) } fdiffgrowthCpp <- function(x, n = 1L, diff = 1L, fill = NA_real_, ng = 0L, g = 0L, gs = NULL, t = NULL, ret = 1L, rho = 1, names = TRUE, power = 1) { .Call(`_collapse_fdiffgrowthCpp`, x, n, diff, fill, ng, g, gs, t, ret, rho, names, power) } fdiffgrowthmCpp <- function(x, n = 1L, diff = 1L, fill = NA_real_, ng = 0L, g = 0L, gs = NULL, t = NULL, ret = 1L, rho = 1, names = TRUE, power = 1) { .Call(`_collapse_fdiffgrowthmCpp`, x, n, diff, fill, ng, g, gs, t, ret, rho, names, power) } fdiffgrowthlCpp <- function(x, n = 1L, diff = 1L, fill = NA_real_, ng = 0L, g = 0L, gs = NULL, t = NULL, ret = 1L, rho = 1, names = TRUE, power = 1) { .Call(`_collapse_fdiffgrowthlCpp`, x, n, diff, fill, ng, g, gs, t, ret, rho, names, power) } flagleadCpp <- function(x, n = 1L, fill = NULL, ng = 0L, g = 0L, t = NULL, names = TRUE) { .Call(`_collapse_flagleadCpp`, x, n, fill, ng, g, t, names) } flagleadmCpp <- function(x, n = 1L, fill = NULL, ng = 0L, g = 0L, t = NULL, names = TRUE) { .Call(`_collapse_flagleadmCpp`, x, n, fill, ng, g, t, names) } flagleadlCpp <- function(x, n = 1L, fill = NULL, ng = 0L, g = 0L, t = NULL, names = TRUE) { .Call(`_collapse_flagleadlCpp`, x, n, fill, ng, g, t, names) } fmeanCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE) { .Call(`_collapse_fmeanCpp`, x, ng, g, gs, w, narm) } fmeanmCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, drop = TRUE) { .Call(`_collapse_fmeanmCpp`, x, ng, g, gs, w, narm, drop) } fmeanlCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, drop = TRUE) { .Call(`_collapse_fmeanlCpp`, x, ng, g, gs, w, narm, drop) } fmodeCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, ret = 0L) { .Call(`_collapse_fmodeCpp`, x, ng, g, gs, w, narm, ret) } fmodelCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, ret = 0L) { .Call(`_collapse_fmodelCpp`, x, ng, g, gs, w, narm, ret) } fmodemCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, drop = TRUE, ret = 0L) { .Call(`_collapse_fmodemCpp`, x, ng, g, gs, w, narm, drop, ret) } fnthCpp <- function(x, Q = 0.5, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, ret = 1L) { .Call(`_collapse_fnthCpp`, x, Q, ng, g, gs, w, narm, ret) } fnthmCpp <- function(x, Q = 0.5, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, drop = TRUE, ret = 1L) { .Call(`_collapse_fnthmCpp`, x, Q, ng, g, gs, w, narm, drop, ret) } fnthlCpp <- function(x, Q = 0.5, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, drop = TRUE, ret = 1L) { .Call(`_collapse_fnthlCpp`, x, Q, ng, g, gs, w, narm, drop, ret) } fprodCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE) { .Call(`_collapse_fprodCpp`, x, ng, g, w, narm) } fprodmCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, drop = TRUE) { .Call(`_collapse_fprodmCpp`, x, ng, g, w, narm, drop) } fprodlCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, drop = TRUE) { .Call(`_collapse_fprodlCpp`, x, ng, g, w, narm, drop) } fscaleCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, set_mean = 0, set_sd = 1) { .Call(`_collapse_fscaleCpp`, x, ng, g, w, narm, set_mean, set_sd) } fscalemCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, set_mean = 0, set_sd = 1) { .Call(`_collapse_fscalemCpp`, x, ng, g, w, narm, set_mean, set_sd) } fscalelCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, set_mean = 0, set_sd = 1) { .Call(`_collapse_fscalelCpp`, x, ng, g, w, narm, set_mean, set_sd) } fvarsdCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, stable_algo = TRUE, sd = TRUE) { .Call(`_collapse_fvarsdCpp`, x, ng, g, gs, w, narm, stable_algo, sd) } fvarsdmCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, stable_algo = TRUE, sd = TRUE, drop = TRUE) { .Call(`_collapse_fvarsdmCpp`, x, ng, g, gs, w, narm, stable_algo, sd, drop) } fvarsdlCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, stable_algo = TRUE, sd = TRUE, drop = TRUE) { .Call(`_collapse_fvarsdlCpp`, x, ng, g, gs, w, narm, stable_algo, sd, drop) } mrtl <- function(X, names = FALSE, ret = 0L) { .Call(`_collapse_mrtl`, X, names, ret) } mctl <- function(X, names = FALSE, ret = 0L) { .Call(`_collapse_mctl`, X, names, ret) } psmatCpp <- function(x, g, t = NULL, transpose = FALSE) { .Call(`_collapse_psmatCpp`, x, g, t, transpose) } pwnobsmCpp <- function(x) { .Call(`_collapse_pwnobsmCpp`, x) } qFCpp <- function(x, ordered = TRUE, na_exclude = TRUE, keep_attr = TRUE, ret = 1L) { .Call(`_collapse_qFCpp`, x, ordered, na_exclude, keep_attr, ret) } funiqueCpp <- function(x, sort = TRUE) { .Call(`_collapse_funiqueCpp`, x, sort) } fdroplevelsCpp <- function(x, check_NA = TRUE) { .Call(`_collapse_fdroplevelsCpp`, x, check_NA) } seqid <- function(x, o = NULL, del = 1L, start = 1L, na_skip = FALSE, skip_seq = FALSE, check_o = TRUE) { .Call(`_collapse_seqid`, x, o, del, start, na_skip, skip_seq, check_o) } groupid <- function(x, o = NULL, start = 1L, na_skip = FALSE, check_o = TRUE) { .Call(`_collapse_groupid`, x, o, start, na_skip, check_o) } varyingCpp <- function(x, ng = 0L, g = 0L, any_group = TRUE) { .Call(`_collapse_varyingCpp`, x, ng, g, any_group) } varyingmCpp <- function(x, ng = 0L, g = 0L, any_group = TRUE, drop = TRUE) { .Call(`_collapse_varyingmCpp`, x, ng, g, any_group, drop) } varyinglCpp <- function(x, ng = 0L, g = 0L, any_group = TRUE, drop = TRUE) { .Call(`_collapse_varyinglCpp`, x, ng, g, any_group, drop) } collapse/R/select_replace_add_vars.R0000644000176200001440000002764514201327077017236 0ustar liggesusers # ind must be integer (not numeric) !!! get_vars_ind <- function(x, ind, return = "data") switch(return, data = .Call(C_subsetCols, x, ind, TRUE), names = attr(x, "names")[ind], indices = ind, named_indices = `names<-`(ind, attr(x, "names")[ind]), logical = `[<-`(logical(length(unclass(x))), ind, value = TRUE), named_logical = `names<-`(`[<-`(logical(length(unclass(x))), ind, value = TRUE), attr(x, "names")), stop("Unknown return option!")) # ind must be logical !!! (this used to be get_vars_FUN) get_vars_indl <- function(x, indl, return = "data") switch(return, data = .Call(C_subsetCols, x, which(indl), TRUE), names = attr(x, "names")[indl], indices = which(indl), named_indices = which(`names<-`(indl, attr(x, "names"))), logical = indl, named_logical = `names<-`(indl, attr(x, "names")), stop("Unknown return option!")) # ind can be integer or logical "get_vars_ind<-" <- function(x, ind, value) { ind <- if(is.logical(ind)) which(ind) else as.integer(ind) if(is.null(value)) { if(!length(ind)) return(condalc(x, inherits(x, "data.table"))) return(.Call(C_subsetCols, x, -ind, TRUE)) } clx <- oldClass(x) oldClass(x) <- NULL if(is.list(value)) { oldClass(value) <- NULL # fastest ?? if(is.object(value)) oldClass(value) <- NULL ?? if(length(value[[1L]]) != length(x[[1L]])) stop("NROW(value) must match nrow(x)") if(length(value) != length(ind)) stop("NCOL(value) must match selected variables") # length(num_vars(x)) x[ind] <- value if(length(nam <- names(value))) names(x)[ind] <- nam # == length(ind) } else { if(NROW(unclass(value)) != length(x[[1L]])) stop("NROW(value) must match nrow(x)") if(length(ind) != 1L) stop("NCOL(value) must match selected variables") # length(num_vars(x)) x[[ind]] <- value } return(condalc(`oldClass<-`(x, clx), any(clx == "data.table"))) } fselect <- function(.x, ..., return = "data") { # This also takes names and indices .... # ax <- attributes(.x) # oldClass(.x) <- NULL # attributes ? nam <- attr(.x, "names") # if(inherits(.x, "data.table")) nam <- nam[seq_col(.x)] # required because of overallocation... -> Should be solved now, always take shallow copy... nl <- `names<-`(as.vector(seq_along(nam), "list"), nam) vars <- eval(substitute(c(...)), nl, parent.frame()) # if(!is.integer(vars)) stop(paste0("Unknown columns: ", .c(...))) # if(!is.integer(vars) || bmax(vars) > length(nam)) # nah, a bit redundant.. if(!is.atomic(vars) || is.logical(vars)) stop("... needs to be expressions evaluating to integer or character") nam_vars <- names(vars) vars <- if(is.character(vars)) ckmatch(vars, nam) else as.integer(vars) # needed, otherwise selecting with doubles gives an error if(length(nam_vars)) { # Allow renaming during selection nonmiss <- nzchar(nam_vars) nam[vars[nonmiss]] <- nam_vars[nonmiss] } # if(!is.numeric(vars)) stop("... needs to be column names, or character / integer / logical vectors") switch(return, # need this for sf data.frame data = .Call(C_subsetCols, if(length(nam_vars)) `attr<-`(.x, "names", nam) else .x, vars, TRUE), # setAttributes(.x[vars], `[[<-`(ax, "names", nam[vars])), # Also Improvements in code below ? names = nam[vars], indices = vars, named_indices = `names<-`(vars, nam[vars]), logical = `[<-`(logical(length(nam)), vars, TRUE), named_logical = `names<-`(`[<-`(logical(length(nam)), vars, TRUE), nam), stop("Unknown return option")) } # or slt sel, selt, sct -> shortcut ? slt <- fselect # good, consistent # fselect(GGDC10S, Country, AGR:SUM) # fselect(GGDC10S, Variable == "VA" & Year > 1990, Country, Year, AGR:SUM) -> why no error ?? first argument is just ignored ... ?? "fselect<-" <- function(x, ..., value) { nam <- attr(x, "names") # if(inherits(x, "data.table")) nam <- nam[seq_col(x)] # required because of overallocation... Should be solved now -> always make shallow copy nl <- `names<-`(as.vector(seq_along(nam), "list"), nam) vars <- eval(substitute(c(...)), nl, parent.frame()) if(!is.atomic(vars) || is.logical(vars)) stop("... needs to be expressions evaluating to integer or character") if(is.character(vars)) vars <- ckmatch(vars, nam) if(vars[1L] < 0L) vars <- seq_along(nam)[vars] # if(!is.numeric(vars)) stop("... needs to be column names, or character / integer / logical vectors") # if(!is.integer(vars)) stop(paste0("Unknown columns: ", .c(...))) `get_vars_ind<-`(x, vars, value) } "slt<-" <- `fselect<-` # STD(fselect(GGDC10S, Country, Variable, Year, AGR:SUM)) # Idea: also do this for replacement functions, replacing characters renames, replacong number reorders, replacing 3 does renaming and reordering? num_vars <- function(x, return = "data") get_vars_indl(x, .Call(C_vtypes, x, 1L), return) # vapply(`attributes<-`(x, NULL), is.numeric, TRUE) nv <- num_vars "num_vars<-" <- function(x, value) `get_vars_ind<-`(x, .Call(C_vtypes, x, 1L), value) "nv<-" <- `num_vars<-` char_vars <- function(x, return = "data") get_vars_ind(x, .Call(C_vtypes, x, 0L) %==% 17L, return) # vapply(`attributes<-`(x, NULL), is.character, TRUE) "char_vars<-" <- function(x, value) `get_vars_ind<-`(x, .Call(C_vtypes, x, 0L) %==% 17L, value) fact_vars <- function(x, return = "data") get_vars_indl(x, .Call(C_vtypes, x, 2L), return) # vapply(`attributes<-`(x, NULL), is.factor, TRUE) "fact_vars<-" <- function(x, value) `get_vars_ind<-`(x, .Call(C_vtypes, x, 2L), value) logi_vars <- function(x, return = "data") get_vars_ind(x, .Call(C_vtypes, x, 0L) %==% 11L, return) # vapply(`attributes<-`(x, NULL), is.logical, TRUE) "logi_vars<-" <- function(x, value) `get_vars_ind<-`(x, .Call(C_vtypes, x, 0L) %==% 11L, value) date_vars <- function(x, return = "data") get_vars_indl(x, vapply(`attributes<-`(x, NULL), is_date, TRUE), return) "date_vars<-" <- function(x, value) `get_vars_ind<-`(x, vapply(`attributes<-`(x, NULL), is_date, TRUE), value) Date_vars <- function(x, return = "data") { message("Note that 'Date_vars' was renamed to 'date_vars'. It will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") date_vars(x, return) } "Date_vars<-" <- function(x, value) { message("Note that 'Date_vars' was renamed to 'date_vars'. It will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") `date_vars<-`(x, value) } cat_vars <- function(x, return = "data") get_vars_ind(x, .Call(C_vtypes, x, 1L) %!=% TRUE, return) "cat_vars<-" <- function(x, value) `get_vars_ind<-`(x, .Call(C_vtypes, x, 1L) %!=% TRUE, value) get_vars <- function(x, vars, return = "data", regex = FALSE, ...) { if(regex) { if(!is.character(vars)) stop("If regex = TRUE, vars must be character") ind <- rgrep(vars, attr(x, "names"), ...) } else { if(!missing(...)) unused_arg_action(match.call(), ...) ind <- cols2int(vars, x, attr(x, "names")) } get_vars_ind(x, ind, return) } gv <- function(x, vars, return = "data", ...) { if(!missing(...)) { warning("Please use the new shortcut 'gvr' for regex column selection.") return(get_vars(x, vars, return, ...)) } ind <- cols2int(vars, x, attr(x, "names")) get_vars_ind(x, ind, return) } gvr <- function(x, vars, return = "data", ...) { if(!is.character(vars)) stop("If regex = TRUE, vars must be character") ind <- rgrep(vars, attr(x, "names"), ...) get_vars_ind(x, ind, return) } "get_vars<-" <- function(x, vars, regex = FALSE, ..., value) { if(regex) { if(!is.character(vars)) stop("If regex = TRUE, vars must be character") ind <- rgrep(vars, attr(x, "names"), ...) } else { if(!missing(...)) unused_arg_action(match.call(), ...) ind <- cols2int(vars, x, attr(x, "names")) } `get_vars_ind<-`(x, ind, value) } "gv<-" <- function(x, vars, ..., value) { if(!missing(...)) { warning("Please use the new shortcut 'gvr<-' for regex column replacement.") return(`get_vars<-`(x, vars, ..., value = value)) } ind <- cols2int(vars, x, attr(x, "names")) `get_vars_ind<-`(x, ind, value) } "gvr<-" <- function(x, vars, ..., value) { ind <- rgrep(vars, attr(x, "names"), ...) `get_vars_ind<-`(x, ind, value) } # Make faster ? "add_vars<-" <- function(x, pos = "end", value) { ax <- attributes(x) attributes(x) <- NULL lx <- length(x) if(is.list(value)) { oldClass(value) <- NULL # fastest ? if(length(value[[1L]]) != length(x[[1L]])) stop("NROW(value) must match nrow(x)") # res <- c(x, value) # FASTER than commented out below if(is.character(pos)) { if(pos == "end") { ax[["names"]] <- if(length(nam <- names(value))) c(ax[["names"]], nam) else c(ax[["names"]], paste0("V", seq(lx+1L, lx+length(value)))) return(condalcSA(c(x, value), ax, any(ax[["class"]] == "data.table"))) } else if(pos != "front") stop("pos needs to be 'end', 'front' or a suitable numeric / integer vector of positions!") ax[["names"]] <- if(length(nam <- names(value))) c(nam, ax[["names"]]) else c(paste0("V", seq_along(value)), ax[["names"]]) return(condalcSA(c(value, x), ax, any(ax[["class"]] == "data.table"))) } lv <- length(value) tl <- lv+lx if(!is.numeric(pos) || length(pos) != lv || bmax(pos) > tl) stop("pos needs to be 'end', 'front' or a suitable numeric / integer vector of positions!") o <- forder.int(c(seq_len(tl)[-pos], pos)) ax[["names"]] <- if(length(nam <- names(value))) c(ax[["names"]], nam)[o] else c(ax[["names"]], paste0("V", pos))[o] # FASTER THIS WAY? -> It seems so... return(condalcSA(c(x, value)[o], ax, any(ax[["class"]] == "data.table"))) # fastest ?? use setcolorder ? (probably not ) # ind <- seq(lx+1L, lx+length(value)) # x[ind] <- value # FASTER than simply using x[names(value)] <- value ? -> Yes ! # ax[["names"]] <- if(length(nam <- names(value))) c(ax[["names"]], nam) else # c(ax[["names"]], paste0("V", ind)) } else { if(NROW(value) != length(x[[1L]])) stop("NROW(value) must match nrow(x)") # res <- c(x, list(value)) # FASTER than below ? -> Nope # ax[["names"]] <- c(ax[["names"]], paste0("V", lx+1L)) nam <- l1orlst(as.character(substitute(value))) if(is.character(pos)) { if(pos == "end") { x[[lx+1L]] <- value ax[["names"]] <- c(ax[["names"]], nam) # paste0("V", lx+1L) return(condalcSA(x, ax, any(ax[["class"]] == "data.table"))) } else if(pos != "front") stop("pos needs to be 'end', 'front' or a suitable numeric / integer vector of positions!") ax[["names"]] <- c(nam, ax[["names"]]) return(condalcSA(c(list(value), x), ax, any(ax[["class"]] == "data.table"))) } if(!is.numeric(pos) || length(pos) > 1L || pos > lx+1L) stop("pos needs to be 'end', 'front' or a suitable numeric / integer vector of positions!") o <- forder.int(c(1:lx, pos-1L)) ax[["names"]] <- c(ax[["names"]], nam)[o] return(condalcSA(c(x, list(value))[o], ax, any(ax[["class"]] == "data.table"))) } } "av<-" <- `add_vars<-` add_vars <- function(x, ..., pos = "end") { if(...length() == 1L) return(`add_vars<-`(x, pos, ...)) l <- c(...) if(!all(fnrow2(x) == vlengths(l, FALSE))) stop("if multiple arguments are passed to '...', each needs to be a data.frame/list with column-lengths matching nrow(x)") return(`add_vars<-`(x, pos, l)) # very minimal ! Doesn't work for vectors etc ! } av <- add_vars # Exercises: # repl <- function(x)x # `repl<-` <- function(x, value) { # x <- value # x # } # repl(x)[2] <- 4 # Works!! # http://adv-r.had.co.nz/Functions.html#special-calls # This works because the expression names(x)[2] <- "two" is evaluated as if you had written: #`*tmp*` <- names(x) #`*tmp*`[2] <- "two" #names(x) <- `*tmp*` collapse/R/qsu.R0000644000176200001440000003171714174223734013230 0ustar liggesusers qsu <- function(x, ...) UseMethod("qsu") # , x qsu.default <- function(x, g = NULL, pid = NULL, w = NULL, higher = FALSE, array = TRUE, stable.algo = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(qsu.matrix(x, g, pid, w, higher, array, stable.algo, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) { if(is.null(pid)) return(fbstatsCpp(x,higher, w = w, stable.algo = stable.algo)) pid <- G_guo(pid) return(fbstatsCpp(x,higher,0L,0L,pid[[1L]],pid[[2L]],w,stable.algo)) } if(is.atomic(g)) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") if(is.null(pid)) return(fbstatsCpp(x,higher,length(lev),g,0L,0L,w,stable.algo,TRUE,TRUE,lev)) pid <- G_guo(pid) return(fbstatsCpp(x,higher,length(lev),g,pid[[1L]],pid[[2L]],w,stable.algo,array,TRUE,lev)) } if(!is_GRP(g)) g <- GRP.default(g, call = FALSE) if(is.null(pid)) return(fbstatsCpp(x,higher,g[[1L]],g[[2L]],0L,0L,w,stable.algo,TRUE,TRUE,GRPnames(g))) pid <- G_guo(pid) fbstatsCpp(x,higher,g[[1L]],g[[2L]],pid[[1L]],pid[[2L]],w,stable.algo,array,TRUE,GRPnames(g)) } qsu.pseries <- function(x, g = NULL, w = NULL, effect = 1L, higher = FALSE, array = TRUE, stable.algo = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) pid <- if(length(effect) == 1L) .subset2(getpix(attr(x, "index")), effect) else finteraction(.subset(getpix(attr(x, "index")), effect)) if(is.null(g)) return(fbstatsCpp(x,higher,0L,0L,fnlevels(pid),pid,w,stable.algo)) if(is.atomic(g)) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(fbstatsCpp(x,higher,length(lev),g,fnlevels(pid),pid,w,stable.algo,array,TRUE,lev)) } if(!is_GRP(g)) g <- GRP.default(g, call = FALSE) fbstatsCpp(x,higher,g[[1L]],g[[2L]],fnlevels(pid),pid,w,stable.algo,array,TRUE,GRPnames(g)) } qsu.matrix <- function(x, g = NULL, pid = NULL, w = NULL, higher = FALSE, array = TRUE, stable.algo = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) { if(is.null(pid)) return(fbstatsmCpp(x,higher, w = w, stable.algo = stable.algo)) pid <- G_guo(pid) return(fbstatsmCpp(x,higher,0L,0L,pid[[1L]],pid[[2L]],w,stable.algo,array)) } if(is.atomic(g)) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") if(is.null(pid)) return(fbstatsmCpp(x,higher,length(lev),g,0L,0L,w,stable.algo,array,lev)) pid <- G_guo(pid) return(fbstatsmCpp(x,higher,length(lev),g,pid[[1L]],pid[[2L]],w,stable.algo,array,lev)) } if(!is_GRP(g)) g <- GRP.default(g, call = FALSE) if(is.null(pid)) return(fbstatsmCpp(x,higher,g[[1L]],g[[2L]],0L,0L,w,stable.algo,array,GRPnames(g))) pid <- G_guo(pid) fbstatsmCpp(x,higher,g[[1L]],g[[2L]],pid[[1L]],pid[[2L]],w,stable.algo,array,GRPnames(g)) } qsu.data.frame <- function(x, by = NULL, pid = NULL, w = NULL, cols = NULL, higher = FALSE, array = TRUE, vlabels = FALSE, stable.algo = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) formby <- is.call(by) formpid <- is.call(pid) formw <- is.call(w) # fastest solution!! (see checks below !!) if(formby || formpid || formw) { v <- NULL class(x) <- NULL nam <- names(x) if(formby) { if(length(by) == 3L) { v <- ckmatch(all.vars(by[[2L]]), nam) byn <- ckmatch(all.vars(by[[3L]]), nam) } else byn <- ckmatch(all.vars(by), nam) by <- if(length(byn) == 1L) x[[byn]] else GRP.default(x[byn], call = FALSE) } else byn <- NULL if(formpid) { if(length(pid) == 3L) { v <- ckmatch(all.vars(pid[[2L]]), nam) pidn <- ckmatch(all.vars(pid[[3L]]), nam) } else pidn <- ckmatch(all.vars(pid), nam) pid <- if(length(pidn) == 1L) x[[pidn]] else GRP.default(x[pidn], return.groups = FALSE, call = FALSE) } else pidn <- NULL if(formw) { widn <- ckmatch(all.vars(w), nam) w <- x[[widn]] } else widn <- NULL if(is.null(v)) { x <- if(is.null(cols)) x[-c(byn, pidn, widn)] else x[cols2int(cols, x, nam, FALSE)] } else x <- x[v] } else if(length(cols)) x <- .subset(x, cols2int(cols, x, attr(x, "names"), FALSE)) # Get vlabels if(vlabels) attr(x, "names") <- paste(attr(x, "names"), vlabels(x, use.names = FALSE), sep = ": ") # original code: if(is.null(by)) { if(is.null(pid)) return(fbstatslCpp(x,higher, w = w, stable.algo = stable.algo)) pid <- G_guo(pid) return(drop(fbstatslCpp(x,higher,0L,0L,pid[[1L]],pid[[2L]],w,stable.algo,array))) } if(is.atomic(by)) { if(!is.nmfactor(by)) by <- qF(by, na.exclude = FALSE) lev <- attr(by, "levels") if(is.null(pid)) return(drop(fbstatslCpp(x,higher,length(lev),by,0L,0L,w,stable.algo,array,lev))) pid <- G_guo(pid) return(drop(fbstatslCpp(x,higher,length(lev),by,pid[[1L]],pid[[2L]],w,stable.algo,array,lev))) } if(!is_GRP(by)) by <- GRP.default(by, call = FALSE) if(is.null(pid)) return(drop(fbstatslCpp(x,higher,by[[1L]],by[[2L]],0L,0L,w,stable.algo,array,GRPnames(by)))) pid <- G_guo(pid) drop(fbstatslCpp(x,higher,by[[1L]],by[[2L]],pid[[1L]],pid[[2L]],w,stable.algo,array,GRPnames(by))) } qsu.list <- function(x, by = NULL, pid = NULL, w = NULL, cols = NULL, higher = FALSE, array = TRUE, vlabels = FALSE, stable.algo = TRUE, ...) qsu.data.frame(x, by, pid, w, cols, higher, array, vlabels, stable.algo, ...) qsu.sf <- function(x, by = NULL, pid = NULL, w = NULL, cols = NULL, higher = FALSE, array = TRUE, vlabels = FALSE, stable.algo = TRUE, ...) { oldClass(x) <- NULL x[[attr(x, "sf_column")]] <- NULL qsu.data.frame(x, by, pid, w, cols, higher, array, vlabels, stable.algo, ...) } qsu.pdata.frame <- function(x, by = NULL, w = NULL, cols = NULL, effect = 1L, higher = FALSE, array = TRUE, vlabels = FALSE, stable.algo = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) pid <- if(length(effect) == 1L) .subset2(getpix(attr(x, "index")), effect) else finteraction(.subset(getpix(attr(x, "index")), effect)) formby <- is.call(by) formw <- is.call(w) # fastest solution if(formby || formw) { v <- NULL class(x) <- NULL nam <- names(x) if(formby) { if(length(by) == 3L) { v <- ckmatch(all.vars(by[[2L]]), nam) byn <- ckmatch(all.vars(by[[3L]]), nam) } else byn <- ckmatch(all.vars(by), nam) by <- if(length(byn) == 1L) x[[byn]] else GRP.default(x[byn]) } else byn <- NULL if(formw) { widn <- ckmatch(all.vars(w), nam) w <- x[[widn]] } else widn <- NULL if(is.null(v)) { x <- if(is.null(cols)) x[-c(byn, widn)] else x[cols2int(cols, x, nam, FALSE)] } else x <- x[v] } else if(length(cols)) x <- .subset(x, cols2int(cols, x, attr(x, "names"), FALSE)) if(vlabels) attr(x, "names") <- paste(attr(x, "names"), vlabels(x, use.names = FALSE), sep = ": ") if(is.null(by)) return(drop(fbstatslCpp(x,higher,0L,0L,fnlevels(pid),pid,w,stable.algo,array))) if(is.atomic(by)) { if(!is.nmfactor(by)) by <- qF(by, na.exclude = FALSE) lev <- attr(by, "levels") return(drop(fbstatslCpp(x,higher,length(lev),by,fnlevels(pid),pid,w,stable.algo,array,lev))) } if(!is_GRP(by)) by <- GRP.default(by, call = FALSE) drop(fbstatslCpp(x,higher,by[[1L]],by[[2L]],fnlevels(pid),pid,w,stable.algo,array,GRPnames(by))) } # Try to speed up ! Printing Takes 100 milliseconds on WDI ! print.qsu <- function(x, digits = 4, nonsci.digits = 9, na.print = "-", return = FALSE, print.gap = 2, ...) { vec2mat <- function(x) if(is.array(x)) x else # outer(1, x) # for variable spacing in vector printing... `attributes<-`(x, list(dim = c(1L, length(x)), dimnames = list("", names(x)))) # faster and better !! formatfun <- function(x) { # , drop0trailing = FALSE redundat ?? class(x) <- NULL xx <- formatC(vec2mat(round(x, digits)), format = "g", flag = "#", digits = nonsci.digits, big.mark = "'", big.interval = 6, # "\u2009": https://stackoverflow.com/questions/30555232/using-a-half-space-as-a-big-mark-for-knitr-output drop0trailing = TRUE, preserve.width = "individual") # format(unclass(round(x,2)), digits = digits, drop0trailing = TRUE, big.mark = ",", big.interval = 6, scientific = FALSE) if(any(ina <- is.na(x))) xx[ina] <- na.print xx <- gsub(" ", "", xx, fixed = TRUE) # remove some weird white space (qsu(GGDS10S)) return(xx) } xx <- if(is.atomic(x)) formatfun(x) else rapply(x, formatfun, how = "list") # No longer necessary, but keep, maybe you want to print lists using print.qsu. if(return) return(xx) else print.default(xx, quote = FALSE, right = TRUE, print.gap = print.gap, ...) invisible(x) } # View.qsu <- function(x) View(unclass(x)) aperm.qsu <- function(a, perm = NULL, resize = TRUE, keep.class = TRUE, ...) { r <- aperm.default(a, perm, resize = resize) if(keep.class) oldClass(r) <- oldClass(a) r } `[.qsu` <- function(x, i, j, ..., drop = TRUE) `oldClass<-`(NextMethod(), oldClass(x)) # testing formula inputs: # best!!! (subsetting unclassed objects is better), and reassigning x does not take more memry than deleting columns, it is often faster !!, also GRP does not mind unclassed objects !! # formtest2 <- function(x, by = NULL, xt = NULL, cols = NULL) { # formby <- is.call(by) # formxt <- is.call(xt) # # # fastest solution: (check: is reassigning x memory efficient ?? should not rather delete columns ??) # if(formby || formxt) { # v <- NULL # class(x) <- NULL # this is faster !! # if(formby) { # if(length(by) == 3L) { # v <- all.vars(by[[2L]]) # namby <- all.vars(by[[3L]]) # } else namby <- all.vars(by) # by <- if(length(namby) == 1L) x[[namby]] else GRP(x, namby) # } else namby <- NULL # if(formxt) { # if(length(xt) == 3L) { # v <- all.vars(xt[[2L]]) # namxt <- all.vars(xt[[3L]]) # } else namxt <- all.vars(xt) # xt <- if(length(namxt) == 1L) x[[namxt]] else GRP(x, namxt) # } else namxt <- NULL # if(is.null(v)) { # reassign ?? or set NULL ??? what is more memory efficient ?? # x <- if(is.null(cols)) x[-match(c(namby,namxt), names(x))] else if(is.function(cols)) # x[vapply(x, cols, TRUE)] else x[cols] # } else x <- x[v] # } else if(length(cols)) { # # class(x) <- NULL # but unclass is faster !! # x <- if(is.function(cols)) unclass(x)[vapply(x, cols, TRUE)] else .subset(x, cols) # } # return(list(x, by, xt)) # } # # formtest1 <- function(x, by = NULL, xt = NULL, cols = NULL) { # formby <- is.call(by) # formxt <- is.call(xt) # # # fastest solution: (check: is reassigning x memory efficient ?? should not rather delete columns ??) # if(formby || formxt) { # v <- NULL # if(formby) { # if(length(by) == 3L) { # v <- all.vars(by[[2L]]) # namby <- all.vars(by[[3L]]) # } else namby <- all.vars(by) # by <- if(length(namby) == 1L) x[[namby]] else GRP(x, namby) # } else namby <- NULL # if(formxt) { # if(length(xt) == 3L) { # v <- all.vars(xt[[2L]]) # namxt <- all.vars(xt[[3L]]) # } else namxt <- all.vars(xt) # xt <- if(length(namxt) == 1L) x[[namxt]] else GRP(x, namxt) # } else namxt <- NULL # if(is.null(v)) { # reassign ?? or set NULL ??? what is more memory efficient ?? # x <- if(is.null(cols)) unclass(x)[-match(c(namby,namxt), names(x))] else if(is.function(cols)) # unclass(x)[vapply(x, cols, TRUE)] else .subset(x, cols) # } else x <- unclass(x)[v] # } else if(length(cols)) { # x <- if(is.function(cols)) unclass(x)[vapply(x, cols, TRUE)] else .subset(x, cols) # } # return(list(x, by, xt)) # } # # formtest3 <- function(x, by = NULL, xt = NULL, cols = NULL) { # formby <- is.call(by) # formxt <- is.call(xt) # # # fastest solution: (check: is reassigning x memory efficient ?? should not rather delete columns ??) # if(formby || formxt) { # v <- NULL # class(x) <- NULL # if(formby) { # if(length(by) == 3L) { # v <- all.vars(by[[2L]]) # namby <- all.vars(by[[3L]]) # } else namby <- all.vars(by) # by <- if(length(namby) == 1L) x[[namby]] else GRP(x, namby) # } else namby <- NULL # if(formxt) { # if(length(xt) == 3L) { # v <- all.vars(xt[[2L]]) # namxt <- all.vars(xt[[3L]]) # } else namxt <- all.vars(xt) # xt <- if(length(namxt) == 1L) x[[namxt]] else GRP(x, namxt) # } else namxt <- NULL # if(is.null(v)) { # reassign ?? or set NULL ??? what is more memory efficient ?? # if(is.null(cols)) x[match(c(namby,namxt), names(x))] <- NULL else if(is.function(cols)) # x[!vapply(x, cols, TRUE)] <- NULL else x[-cols] <- NULL # } else x[-v] <- NULL # } else if(length(cols)) { # if(is.function(cols)) x[!vapply(x, cols, TRUE)] <- NULL else x[-cols] <- NULL # } # return(list(x, by, xt)) # } # collapse/R/fsubset_ftransform.R0000644000176200001440000010334614201327077016326 0ustar liggesusers fsubset <- function(.x, ...) UseMethod("fsubset") sbt <- fsubset # Also not really faster than default for numeric (but a bit faster for factors ...) fsubset.default <- function(.x, subset, ...) { if(is.matrix(.x) && !inherits(.x, "matrix")) return(fsubset.matrix(.x, subset, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.logical(subset)) return(.Call(C_subsetVector, .x, which(subset), FALSE)) .Call(C_subsetVector, .x, subset, TRUE) } fsubset.matrix <- function(.x, subset, ..., drop = FALSE) { if(missing(...)) return(.x[subset, , drop = drop]) # better row subsetting ? (like df, method? use mctl ?) nl <- `names<-`(as.vector(1L:ncol(.x), "list"), dimnames(.x)[[2L]]) vars <- eval(substitute(c(...)), nl, parent.frame()) if(missing(subset)) return(.x[, vars, drop = drop]) .x[subset, vars, drop = drop] } # No lazy eval ss <- function(x, i, j) { if(is.atomic(x)) if(is.matrix(x)) return(if(missing(j)) x[i, , drop = FALSE] else if(missing(i)) x[, j, drop = FALSE] else x[i, j, drop = FALSE]) else return(x[i]) mj <- missing(j) if(mj) j <- seq_along(unclass(x)) else if(is.integer(j)) { # if(missing(i)) stop("Need to supply either i or j or both") if(missing(i)) return(.Call(C_subsetCols, x, j, TRUE)) if(any(j < 0L)) j <- seq_along(unclass(x))[j] } else { if(is.character(j)) { j <- ckmatch(j, attr(x, "names")) } else if(is.logical(j)) { if(length(j) != length(unclass(x))) stop("If j is logical, it needs to be of length ncol(x)") j <- which(j) } else if(is.numeric(j)) { j <- if(any(j < 0)) seq_along(unclass(x))[j] else as.integer(j) } else stop("j needs to be supplied integer indices, character column names, or a suitable logical vector") if(missing(i)) return(.Call(C_subsetCols, x, j, TRUE)) } checkrows <- TRUE if(!is.integer(i)) { if(is.numeric(i)) i <- as.integer(i) else if(is.logical(i)) { nr <- fnrow2(x) if(length(i) != nr) stop("i needs to be integer or logical(nrow(x))") # which(r & !is.na(r)) not needed ! i <- which(i) if(length(i) == nr) if(mj) return(x) else return(.Call(C_subsetCols, x, j, TRUE)) checkrows <- FALSE } else stop("i needs to be integer or logical(nrow(x))") } rn <- attr(x, "row.names") if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") return(.Call(C_subsetDT, x, i, j, checkrows)) return(`attr<-`(.Call(C_subsetDT, x, i, j, checkrows), "row.names", rn[i])) } fsubset.data.frame <- function(.x, subset, ...) { r <- eval(substitute(subset), .x, parent.frame()) # Needs to be placed above any column renaming if(missing(...)) vars <- seq_along(unclass(.x)) else { ix <- seq_along(unclass(.x)) nl <- `names<-`(as.vector(ix, "list"), attr(.x, "names")) vars <- eval(substitute(c(...)), nl, parent.frame()) nam_vars <- names(vars) if(is.integer(vars)) { if(any(vars < 0L)) vars <- ix[vars] } else { if(is.character(vars)) vars <- ckmatch(vars, names(nl)) else if(is.numeric(vars)) { vars <- if(any(vars < 0)) ix[vars] else as.integer(vars) } else stop("... needs to be comma separated column names, or column indices") } if(length(nam_vars)) { nonmiss <- nzchar(nam_vars) attr(.x, "names")[vars[nonmiss]] <- nam_vars[nonmiss] } } checkrows <- TRUE if(is.logical(r)) { nr <- fnrow2(.x) if(length(r) != nr) stop("subset needs to be an expression evaluating to logical(nrow(.x)) or integer") # which(r & !is.na(r)) not needed ! r <- which(r) if(length(r) == nr) if(missing(...)) return(.x) else return(.Call(C_subsetCols, .x, vars, TRUE)) checkrows <- FALSE } else if(is.numeric(r)) r <- as.integer(r) else stop("subset needs to be an expression evaluating to logical(nrow(.x)) or integer") rn <- attr(.x, "row.names") if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") return(.Call(C_subsetDT, .x, r, vars, checkrows)) return(`attr<-`(.Call(C_subsetDT, .x, r, vars, checkrows), "row.names", rn[r])) } # Example: # fsubset(GGDC10S, Variable == "VA" & Year > 1990, Country, Year, AGR:SUM) ftransform_core <- function(X, value) { # value is unclassed, X has all attributes ax <- attributes(X) # keep like this ? oldClass(X) <- NULL nam <- names(value) if(!length(nam) || fanyDuplicated(nam)) stop("All replacement expressions have to be uniquely named") namX <- names(X) # !length also detects character(0) if(!length(namX) || fanyDuplicated(namX)) stop("All columns of .data have to be uniquely named") le <- vlengths(value, FALSE) nr <- length(X[[1L]]) rl <- le == nr # checking if computed values have the right length inx <- match(nam, namX) # calling names on a plain list is really fast -> no need to save objects.. matched <- !is.na(inx) if(all(rl)) { # All computed vectors have the right length if(any(matched)) X[inx[matched]] <- value[matched] } else { # Some do not if(any(1L < le & !rl)) stop("Lengths of replacements must be equal to nrow(.data) or 1, or NULL to delete columns") if(any(le1 <- le == 1L)) value[le1] <- lapply(value[le1], alloc, nr) # Length 1 arguments. can use TRA ?, or rep_len, but what about date variables ? if(any(le0 <- le == 0L)) { # best order -> yes, ftransform(mtcars, bla = NULL) just returns mtcars, but could also put this error message: if(any(le0 & !matched)) stop(paste("Can only delete existing columns, unknown columns:", paste(nam[le0 & !matched], collapse = ", "))) if(all(le0)) { X[inx[le0]] <- NULL return(`oldClass<-`(X, ax[["class"]])) } matched <- matched[!le0] value <- value[!le0] # value[le0] <- NULL if(any(matched)) X[inx[!le0][matched]] <- value[matched] # index is wrong after first deleting, thus we delete after ! X[inx[le0]] <- NULL } else if(any(matched)) X[inx[matched]] <- value[matched] # NULL assignment ... -> Nope ! } if(all(matched)) return(`oldClass<-`(X, ax[["class"]])) ax[["names"]] <- c(names(X), names(value)[!matched]) setAttributes(c(X, value[!matched]), ax) } ftransform <- function(.data, ...) { # `_data` ? if(!is.list(.data)) stop(".data needs to be a list of equal length columns or a data.frame") e <- eval(substitute(list(...)), .data, parent.frame()) if(is.null(names(e)) && length(e) == 1L && is.list(e[[1L]])) e <- unclass(e[[1L]]) # support list input -> added in v1.3.0 return(condalc(ftransform_core(.data, e), inherits(.data, "data.table"))) } tfm <- ftransform `ftransform<-` <- function(.data, value) { if(!is.list(.data)) stop(".data needs to be a list of equal length columns or a data.frame") if(!is.list(value)) stop("value needs to be a named list") return(condalc(ftransform_core(.data, unclass(value)), inherits(.data, "data.table"))) } `tfm<-` <- `ftransform<-` # Example: # ftransform(mtcars, cyl = cyl + 10, vs2 = 1, mpg = NULL) eval_exp <- function(nam, exp, pe) { nl <- `names<-`(as.vector(seq_along(nam), "list"), nam) eval(exp, nl, pe) } ftransformv <- function(.data, vars, FUN, ..., apply = TRUE) { if(!is.list(.data)) stop(".data needs to be a list of equal length columns or a data.frame") if(!is.function(FUN)) stop("FUN needs to be a function") clx <- oldClass(.data) vs <- tryCatch(vars, error = function(e) NULL) if(apply) { oldClass(.data) <- NULL if(is.null(vs)) vs <- eval_exp(names(.data), substitute(vars), parent.frame()) vars <- cols2int(vs, .data, names(.data), FALSE) value <- `names<-`(.data[vars], NULL) value <- if(missing(...)) lapply(value, FUN) else eval(substitute(lapply(value, FUN, ...)), .data, parent.frame()) } else { nam <- attr(.data, "names") if(is.null(vs)) vs <- eval_exp(nam, substitute(vars), parent.frame()) vars <- cols2int(vs, .data, nam, FALSE) value <- .Call(C_subsetCols, .data, vars, FALSE) value <- if(missing(...)) unclass(FUN(value)) else # unclass needed here ? -> yes for lengths... unclass(eval(substitute(FUN(value, ...)), .data, parent.frame())) if(!identical(names(value), nam[vars])) return(condalc(ftransform_core(.data, value), any(clx == "data.table"))) oldClass(.data) <- NULL } le <- vlengths(value, FALSE) nr <- length(.data[[1L]]) if(allv(le, nr)) .data[vars] <- value else if(allv(le, 1L)) .data[vars] <- lapply(value, alloc, nr) else { if(apply) names(value) <- names(.data)[vars] .data <- ftransform_core(.data, value) } return(condalc(`oldClass<-`(.data, clx), any(clx == "data.table"))) } tfmv <- ftransformv settransform <- function(.data, ...) assign(as.character(substitute(.data)), ftransform(.data, ...), envir = parent.frame()) # eval.parent(substitute(.data <- get0("ftransform", envir = getNamespace("collapse"))(.data, ...))) # can use `<-`(.data, ftransform(.data,...)) but not faster .. settfm <- settransform settransformv <- function(.data, vars, FUN, ..., apply = TRUE) assign(as.character(substitute(.data)), ftransformv(.data, vars, FUN, ..., apply = apply), envir = parent.frame()) # eval.parent(substitute(.data <- get0("ftransformv", envir = getNamespace("collapse"))(.data, vars, FUN, ..., apply = apply))) settfmv <- settransformv fcompute_core <- function(.data, e, keep = NULL) { ax <- attributes(.data) nam <- ax[["names"]] if(!length(nam) || fanyDuplicated(nam)) stop("All columns of .data have to be uniquely named") if(length(keep)) { keep <- cols2int(keep, .data, nam, FALSE) if(any(m <- match(names(e), nam[keep], nomatch = 0L))) { temp <- .subset(.data, keep) pos <- m > 0L temp[m[pos]] <- e[pos] e <- c(temp, e[!pos]) } else e <- c(.subset(.data, keep), e) } if(inherits(.data, "sf") && !any(names(e) == attr(.data, "sf_column"))) e <- c(e, .subset(.data, attr(.data, "sf_column"))) ax[["names"]] <- names(e) le <- vlengths(e, FALSE) nr <- fnrow2(.data) rl <- le == nr if(all(rl)) return(condalcSA(e, ax, inherits(.data, "data.table"))) # All computed vectors have the right length if(any(1L < le & !rl)) stop("Lengths of replacements must be equal to nrow(.data) or 1") e[!rl] <- lapply(e[!rl], alloc, nr) return(condalcSA(e, ax, inherits(.data, "data.table"))) } fcompute <- function(.data, ..., keep = NULL) { # within ? if(!is.list(.data)) stop(".data needs to be a list of equal length columns or a data.frame") e <- eval(substitute(list(...)), .data, parent.frame()) if(is.null(names(e)) && length(e) == 1L && is.list(e[[1L]])) e <- unclass(e[[1L]]) # support list input -> added in v1.3.0 return(fcompute_core(.data, e, keep)) } fcomputev <- function(.data, vars, FUN, ..., apply = TRUE, keep = NULL) { if(!is.list(.data)) stop(".data needs to be a list of equal length columns or a data.frame") if(!is.function(FUN)) stop("FUN needs to be a function") vs <- tryCatch(vars, error = function(e) NULL) nam <- attr(.data, "names") if(is.null(vs)) vs <- eval_exp(nam, substitute(vars), parent.frame()) vars <- cols2int(vs, .data, nam, FALSE) if(apply) { value <- `names<-`(.subset(.data, vars), NULL) value <- if(missing(...)) lapply(value, FUN) else eval(substitute(lapply(value, FUN, ...)), .data, parent.frame()) names(value) <- nam[vars] } else { value <- .Call(C_subsetCols, .data, vars, FALSE) value <- if(missing(...)) unclass(FUN(value)) else # unclass needed here ? -> yes for lengths... unclass(eval(substitute(FUN(value, ...)), .data, parent.frame())) } return(fcompute_core(.data, value, keep)) # Note: Need to do this, value could be scalars or vectors } # fmutate fFUN_mutate_add_groups <- function(z) { if(!is.call(z)) return(z) cz <- l1orlst(as.character(z[[1L]])) if(any(cz == .FAST_FUN_MOPS)) { z$g <- quote(.g_) if(any(cz == .FAST_STAT_FUN_POLD) && is.null(z$TRA)) z$TRA <- 1L # if(is.null(z$TRA)) z$TRA <- 1L # z$use.g.names <- FALSE # Not necessary } # This works for nested calls (nothing more required, but need to put at the end..) if(length(z) > 2L || is.call(z[[2L]])) return(as.call(lapply(z, fFUN_mutate_add_groups))) # Need because: mpg - fmean(mpg) z } gsplit_single_apply <- function(x, g, ex, v, encl) copyMostAttributes(unlist(lapply(gsplit(x, g), function(i) eval(ex, `names<-`(list(i), v), encl)), FALSE, FALSE), x) gsplit_multi_apply <- function(x, g, ex, encl) { sx <- seq_along(x) unlist(lapply(gsplit(NULL, g), function(i) eval(ex, .Call(C_subsetDT, x, i, sx, FALSE), encl)), FALSE, FALSE) } othFUN_compute <- function(x) { if(length(x) == 2L) # No additional function arguments return(substitute(unlist(lapply(.gsplit_(a, .g_), b), FALSE, FALSE), list(a = x[[2L]], b = x[[1L]]))) # With more arguments, things become more complex.. lapply_call <- as.call(c(list(quote(lapply), substitute(.gsplit_(a, .g_), list(a = x[[2L]]))), as.list(x[-2L]))) substitute(unlist(a, FALSE, FALSE), list(a = lapply_call, b = x[[2L]])) } keep_v <- function(d, v) copyMostAttributes(null_rm(.subset(d, unique.default(v))), d) acr_get_cols <- function(.cols, d, nam, ce) { # Note: .cols is passed through substitute() before it enters here. Thus only an explicit NULL is NULL up front if(is.null(.cols)) return(if(is.null(d[[".g_"]])) seq_along(nam) else seq_along(nam)[nam %!in% c(".g_", ".gsplit_", d[[".g_"]]$group.vars)]) nl <- `names<-`(as.vector(seq_along(nam), "list"), nam) cols <- eval(.cols, nl, ce) # Needed for programming usage, because you can pass a variable that is null if(is.null(cols)) return(if(is.null(d[[".g_"]])) seq_along(nam) else seq_along(nam)[nam %!in% c(".g_", ".gsplit_", d[[".g_"]]$group.vars)]) return(cols2int(cols, d, nam)) # if(is.integer(cols)) cols else (you are checking against length(cols) in setup_across) } # TODO: Implement for collap() ?? acr_get_funs <- function(.fnsexp, .fns, ce) { if(is.function(.fns)) { namfun <- l1orlst(as.character(.fnsexp)) .fns <- `names<-`(list(.fns), namfun) } else if(is.list(.fns)) { namfun <- names(.fns) # In programming usage, could simply pass a list of functions l, in which case this is not a call.. if(is.call(.fnsexp) && (.fnsexp[[1L]] == quote(list) || .fnsexp[[1L]] == quote(c))) { # or we could have funlist[[i]] which is also sorted out here.. nf <- all.vars(.fnsexp, unique = FALSE) if(length(nf) == length(.fns)) { names(.fns) <- nf if(is.null(namfun)) namfun <- nf } else { nf <- vapply(.fnsexp[-1L], function(x) l1orlst(all.vars(x)), character(1L), USE.NAMES = FALSE) names(.fns) <- nf if(is.null(namfun)) namfun <- as.character(seq_along(.fns)) } } else if(is.null(namfun)) names(.fns) <- namfun <- as.character(seq_along(.fns)) } else if(is.character(.fns)) { namfun <- names(.fns) names(.fns) <- .fns .fns <- lapply(.fns, get, mode = "function", envir = ce) # lapply(.fns, match.fun()) if(is.null(namfun)) namfun <- names(.fns) } else stop(".fns must be a fucntion, list of functions or character vector of function names") return(list(namfun = namfun, funs = .fns)) } fungroup2 <- function(X, ocl) { attr(X, "groups") <- NULL oldClass(X) <- fsetdiff(ocl, c("GRP_df", "grouped_df")) X } setup_across <- function(.cols, .fnsexp, .fns, .names, .apply, .transpose, .FFUN) { pe <- parent.frame(n = 4L) d <- unclass(pe$.data) # Safer to unclass here also... ce <- parent.frame(n = 5L) # Caller environment # return(list(.cols, .fns, .names, d)) nam <- names(d) cols <- acr_get_cols(.cols, d, nam, ce) funs <- acr_get_funs(.fnsexp, .fns, ce) namfun <- funs$namfun fun <- funs$funs if(length(.names) && !is.logical(.names)) { if(is.function(.names)) names <- .names(nam[cols], namfun) else { if(length(.names) != length(namfun) * length(cols)) stop("length(.names) must match length(.fns) * length(.cols)") names <- .names } } else { # Third version: .names = FALSE does nothing. Allows fmutate(mtcars, across(cyl:vs, list(L, D, G), n = 1:3)) # This makes sense, because if .transpose = "auto" and the lengths of generated columns are unequal, you cannot use generated names anyway because they would mismatch.. names <- if((is.null(.names) && length(namfun) == 1L) || (isFALSE(.names) && length(namfun) > 1L)) NULL else if(isFALSE(.names)) # this allows you to force names false for a single function... nam[cols] else if(isFALSE(.transpose)) as.vector(outer(nam[cols], namfun, paste, sep = "_")) else as.vector(t(outer(nam[cols], namfun, paste, sep = "_"))) # Second version: .names = TRUE auto generates names, .names = FALSE yields default names (no change to names by the function), # and .names = NULL (default) yields function names or auto names if multiple functions... # names <- if(is.null(.names) && length(namfun) == 1L) NULL else if(!isFALSE(.names)) # as.vector(t(outer(nam[cols], namfun, paste, sep = "_"))) else if(length(namfun) == 1L) # nam[cols] else stop("Computed columns need to be uniquely named. If .names = FALSE, can only use one function, or need to supply custom names!") # First version: requires .names = FALSE for renaming functions like L, W etc... # names <- if(isFALSE(.names)) NULL else # if(length(namfun) == 1L && !isTRUE(.names)) nam[cols] else # as.vector(t(outer(nam[cols], namfun, paste, sep = "_"))) } if(is.logical(.apply)) { aplvec <- if(.apply) rep_len(TRUE, length(fun)) else rep_len(FALSE, length(fun)) } else { .apply <- switch(.apply, auto = NA, stop(".apply must be 'auto', TRUE or FALSE")) aplvec <- names(fun) %!in% .FFUN } .data_ <- if(all(aplvec)) d[cols] else .Call(C_subsetCols, if(is.null(d[[".g_"]])) `oldClass<-`(d, pe$cld) else fungroup2(d, pe$cld), cols, FALSE) # Note: Keep the order and the names !!! list(data = d, .data_ = .data_, # cols = cols, funs = fun, aplvec = `names<-`(aplvec, names(fun)), ce = ce, names = names) } across <- function(.cols = NULL, .fns, ..., .names = NULL, .apply = "auto", .transpose = "auto") { stop("across() can only work inside fmutate() and fsummarise()") } do_across <- function(.cols = NULL, .fns, ..., .names = NULL, .apply = "auto", .transpose = "auto", .eval_funi, .summ = TRUE) { # nodots <- missing(...) # return(setup_across(substitute(.cols), substitute(.fns), .fns, .names, .apply, .FAST_FUN_MOPS)) setup <- setup_across(substitute(.cols), substitute(.fns), .fns, .names, .apply, .transpose, .FAST_FUN_MOPS) nf <- names(setup$funs) names <- setup$names # return(eval_funi(nf, ...)) # return(lapply(nf, eval_funi, ...)) if(length(nf) == 1L) { res <- .eval_funi(nf, setup[[1L]], setup[[2L]], setup[[3L]], setup[[4L]], setup[[5L]], ...) # eval_funi(nf, aplvec, funs, nodots, .data_, data, ce, ...) # return(res) } else { # motivated by: fmutate(mtcars, across(cyl:vs, list(L, D, G), n = 1:3)) r <- lapply(nf, .eval_funi, setup[[1L]], setup[[2L]], setup[[3L]], setup[[4L]], setup[[5L]], ...) # do.call(lapply, c(list(nf, eval_funi), setup[1:5], list(...))) # lapply(nf, eval_funi, aplvec, funs, nodots, .data_, data, ce, ...) # return(r) if(isFALSE(.transpose) || (is.character(.transpose) && !all_eq(vlengths(r, FALSE)))) { # stop("reached here") res <- unlist(r, FALSE, use.names = TRUE) # need use.names= TRUE here # return(list(res = res, r = r)) } else { res <- unlist(t_list2(r), FALSE, FALSE) if(is.null(names(res)) && is.null(names)) names(res) <- unlist(t_list2(lapply(r, names)), FALSE, FALSE) } } if(.summ) return(if(is.null(names)) res else `names<-`(res, names)) return(`[<-`(setup$data, if(is.null(names)) names(res) else names, value = res)) } mutate_funi_simple <- function(i, data, .data_, funs, aplvec, ce, ...) { # g is unused here... .FUN_ <- funs[[i]] if(aplvec[i]) { value <- if(missing(...)) lapply(unattrib(.data_), .FUN_) else do.call(lapply, c(list(unattrib(.data_), .FUN_), eval(substitute(list(...)), data, ce)), envir = ce) # eval(substitute(lapply(unattrib(.data_), .FUN_, ...)), c(list(.data_ = .data_), data), ce) names(value) <- names(.data_) } else if(any(i == .FAST_STAT_FUN_POLD)) { if(missing(...)) return(unclass(.FUN_(.data_, TRA = 1L))) # Old way: Not necessary to construct call.. return(unclass(eval(as.call(list(as.name(i), quote(.data_), TRA = 1L))))) # faster than substitute(.FUN_(.data_, TRA = 1L), list(.FUN_ = as.name(i))) # if(any(...names() == "TRA")) # This down not work because it substitutes setup[[]] from mutate_across !!! # return(unclass(eval(substitute(.FUN_(.data_, ...)), c(list(.data_ = .data_), data), ce))) # return(unclass(eval(substitute(.FUN_(.data_, ..., TRA = 1L)), c(list(.data_ = .data_), data), ce))) fcal <- as.call(c(list(as.name(i), quote(.data_)), as.list(substitute(list(...))[-1L]))) if(is.null(fcal$TRA)) fcal$TRA <- 1L return(unclass(eval(fcal, c(list(.data_ = .data_), data), ce))) } else { value <- if(missing(...)) .FUN_(.data_) else do.call(.FUN_, c(list(.data_), eval(substitute(list(...)), data, ce)), envir = ce) # Object setup not found: eval(substitute(.FUN_(.data_, ...)), c(list(.data_ = .data_), data), ce) oldClass(value) <- NULL if(any(i == .FAST_FUN_MOPS)) return(value) # small improvement for fast funs... } # return(unclass(r)) # fcal <- if(missing(...)) as.call(list(funs[[i]], quote(.data_))) else # as.call(c(list(funs[[i]], quote(.data_)), as.list(substitute(list(...))[-1L]))) # , parent.frame() # # substitute(list(...), parent.frame()) # # substitute(FUN(.data_, ...), list(FUN = funs[[i]], ...)) # # as.call(substitute(list(funs[[i]], quote(.data_), ...))) # # substitute(FUN(.data_, ...), list(FUN = funs[[i]])) # # if(any(i == .FAST_STAT_FUN_POLD) && is.null(fcal$TRA)) fcal$TRA <- 1L # fast functions have a data.frame method, thus can be applied simultaneously to all columns # return(fcal) # return(eval(fcal, c(list(.data_ = .data_), data), setup$ce)) lv <- vlengths(value, FALSE) nr <- length(data[[1L]]) if(all(lv == nr)) return(value) if(all(lv == 1L)) return(lapply(value, alloc, nr)) stop("Without groups, NROW(value) must either be 1 or nrow(.data)") } dots_apply_grouped <- function(d, g, f, dots) { attributes(d) <- NULL n <- length(d[[1L]]) if(any(ln <- vlengths(dots, FALSE) == n)) { ln <- which(ln) if(length(ln) > 1L) { # multiple arguments to be split asl <- lapply(dots[ln], gsplit, g) if(length(dots) > length(ln)) { mord <- dots[-ln] FUN <- function(x) do.call(mapply, c(list(f, gsplit(x, g), SIMPLIFY = FALSE, USE.NAMES = FALSE, MoreArgs = mord), asl)) } else FUN <- function(x) do.call(mapply, c(list(f, gsplit(x, g), SIMPLIFY = FALSE, USE.NAMES = FALSE), asl)) } else { # Only one argument to be split nam <- names(dots) as <- gsplit(dots[[ln]], g) FUN <- quote(function(x) mapply(f, gsplit(x, g), SIMPLIFY = FALSE, USE.NAMES = FALSE)) FUN[[3L]][[if(length(nam) && nzchar(nam[ln])) nam[ln] else 6L]] <- quote(as) if(length(dots) > 1L) { mord <- dots[-ln] FUN[[3L]]$MoreArgs <- quote(mord) } FUN <- eval(FUN) } return(lapply(d, function(y) copyMostAttributes(unlist(FUN(y), FALSE, FALSE), y))) } # No arguments to be split: do.call(lapply, c(list(d, copysplaplfun, g, f), dots)) } dots_apply_grouped_bulk <- function(d, g, f, dots) { n <- fnrow2(d) dsp <- rsplit.data.frame(d, g, simplify = FALSE, flatten = TRUE, use.names = FALSE) if(is.null(dots)) return(lapply(dsp, f)) if(any(ln <- vlengths(dots, FALSE) == n)) { ln <- which(ln) if(length(ln) > 1L) { # multiple arguments to be split asl <- lapply(dots[ln], gsplit, g) return(do.call(mapply, c(list(f, dsp, SIMPLIFY = FALSE, USE.NAMES = FALSE, MoreArgs = if(length(dots) > length(ln)) dots[-ln] else NULL), asl))) } else { # Only one argument to be split nam <- names(dots) as <- gsplit(dots[[ln]], g) FUN <- quote(mapply(f, dsp, SIMPLIFY = FALSE, USE.NAMES = FALSE)) FUN[[if(length(nam) && nzchar(nam[ln])) nam[ln] else 6L]] <- quote(as) if(length(dots) > 1L) { mord <- dots[-ln] FUN$MoreArgs <- quote(mord) } return(eval(FUN)) } } # No arguments to be split: do.call(lapply, c(list(dsp, f), dots)) } mutate_funi_grouped <- function(i, data, .data_, funs, aplvec, ce, ...) { g <- data[[".g_"]] .FUN_ <- funs[[i]] apli <- aplvec[i] if(apli) { value <- if(missing(...)) lapply(unattrib(.data_), copysplaplfun, g, .FUN_) else dots_apply_grouped(.data_, g, .FUN_, eval(substitute(list(...)), data, ce)) # Before: do.call(lapply, c(list(unattrib(.data_), copysplaplfun, g, .FUN_), eval(substitute(list(...)), data, ce)), envir = ce) } else if(any(i == .FAST_STAT_FUN_POLD)) { if(missing(...)) return(unclass(.FUN_(.data_, g = g, TRA = 1L))) fcal <- as.call(c(list(as.name(i), quote(.data_), g = quote(.g_)), as.list(substitute(list(...))[-1L]))) if(is.null(fcal$TRA)) fcal$TRA <- 1L return(unclass(eval(fcal, c(list(.data_ = .data_), data), ce))) } else if(any(i == .FAST_FUN_MOPS)) { if(any(i == .OPERATOR_FUN)) { value <- if(missing(...)) .FUN_(.data_, by = g) else do.call(.FUN_, c(list(.data_, by = g), eval(substitute(list(...)), data, ce)), envir = ce) } else { value <- if(missing(...)) .FUN_(.data_, g = g) else do.call(.FUN_, c(list(.data_, g = g), eval(substitute(list(...)), data, ce)), envir = ce) } oldClass(value) <- NULL return(value) } else { # stop("In grouped computations, .apply = FALSE only works with .FAST_FUN and .OPERATOR_FUN") value <- dots_apply_grouped_bulk(.data_, g, .FUN_, if(missing(...)) NULL else eval(substitute(list(...)), data, ce)) value <- .Call(C_rbindlist, unclass(value), FALSE, FALSE, NULL) oldClass(value) <- NULL } lv <- vlengths(value, FALSE) nr <- length(data[[1L]]) if(all(lv == nr)) { # Improve efficiency here?? if(!isTRUE(g$ordered[2L])) value <- lapply(value, greorder, g) if(apli) names(value) <- names(.data_) return(value) } if(!all(lv == g[[1L]])) stop("With groups, NROW(value) must either be ng or nrow(.data)") if(apli) names(value) <- names(.data_) return(.Call(C_subsetDT, value, g[[2L]], seq_along(value), FALSE)) } do_grouped_expr <- function(ei, eiv, .data, g, pe) { v <- all.vars(ei, unique = FALSE) if(length(v) > 1L) { # Could include global environemntal variables e.g. fmutate(data, new = mean(var) + q) namd <- names(.data) if(length(wv <- na_rm(match(v, namd))) > 1L) return(gsplit_multi_apply(.data[wv], g, ei, pe)) return(gsplit_single_apply(.data[[wv]], g, ei, namd[wv], pe)) } if(length(eiv) == 2L) return(copyMostAttributes(eval(othFUN_compute(ei), .data, pe), .data[[v]])) gsplit_single_apply(.data[[v]], g, ei, v, pe) } # TODO: Preserves attributes ?? what about ftransform?? fmutate <- function(.data, ..., .keep = "all") { if(!is.list(.data)) stop(".data needs to be a list of equal length columns or a data.frame") e <- substitute(list(...)) nam <- names(e) nullnam <- is.null(nam) # if(!length(nam)) stop("All replacement expressions have to be named") pe <- parent.frame() cld <- oldClass(.data) # This needs to be called cld, because across fetches it from here !! oldClass(.data) <- NULL nr <- length(.data[[1L]]) namdata <- names(.data) if(is.null(namdata) || fanyDuplicated(namdata)) stop("All columns of .data have to be uniquely named") if(!is.character(.keep)) .keep <- cols2char(.keep, .data, namdata) # allowing .keep to be NULL gdfl <- any(cld == "grouped_df") if(gdfl) { g <- GRP.grouped_df(.data, return.groups = FALSE, call = FALSE) .data[[".g_"]] <- g .data[[".gsplit_"]] <- gsplit for(i in 2:length(e)) { ei <- e[[i]] if(nullnam || nam[i] == "") { # Across if(ei[[1L]] != quote(across) && ei[[1L]] != quote(acr)) stop("expressions need to be named or start with across(), or its shorthand acr().") ei[[1L]] <- quote(do_across) ei$.eval_funi <- quote(mutate_funi_grouped) ei$.summ <- FALSE # return(eval(ei, enclos = pe)) .data <- eval(ei, list(do_across = do_across, mutate_funi_grouped = mutate_funi_grouped), pe) # ftransform_core(.data, eval(ei, pe)) } else { # Tagged vector expressions if(is.null(ei)) { .data[[nam[i]]] <- NULL next } eiv <- all.names(ei) if(any(eiv %in% .FAST_FUN_MOPS)) { .data[[nam[i]]] <- eval(fFUN_mutate_add_groups(ei), .data, pe) } else { r <- do_grouped_expr(ei, eiv, .data, g, pe) .data[[nam[i]]] <- if(length(r) == g[[1L]]) .Call(C_subsetVector, r, g[[2L]], FALSE) else # .Call(Cpp_TRA, .data[[v]], r, g[[2L]], 1L) # Faster than simple subset r[g[[2L]] ??] greorder(r, g) # r[forder.int(forder.int(g[[2L]]))] # Seems twice is necessary... } } } .data[c(".g_", ".gsplit_")] <- NULL } else { # Without groups... for(i in 2:length(e)) { # This is good and very fast ei <- e[[i]] if(nullnam || nam[i] == "") { # Across if(ei[[1L]] != quote(across) && ei[[1L]] != quote(acr)) stop("expressions need to be named or start with across(), or its shorthand acr().") ei[[1L]] <- quote(do_across) ei$.eval_funi <- quote(mutate_funi_simple) ei$.summ <- FALSE # return(eval(ei, enclos = pe)) .data <- eval(ei, list(do_across = do_across, mutate_funi_simple = mutate_funi_simple), pe) # ftransform_core(.data, eval(ei, enclos = pe)) } else { # Tagged vector expressions r <- eval(ei, .data, pe) if(!is.null(r)) { # don't use length(), because only NULL removes list elements... if(length(r) == 1L) r <- alloc(r, nr) else if(length(r) != nr) stop("length mismatch") } .data[[nam[i]]] <- r } } } # Implementing .keep argument # TODO: Implement .keep with across... .data <- if(length(.keep) > 1L) keep_v(.data, c(.keep, nam[-1L])) else switch(.keep, all = .data, used = keep_v(.data, c(namdata[namdata %in% c(if(gdfl) g$group.vars, unlist(lapply(e[-1L], all.vars), FALSE, FALSE), nam[-1L])], nam[-1L])), unused = keep_v(.data, c(namdata[namdata %in% c(if(gdfl) g$group.vars, fsetdiff(namdata, unlist(lapply(e[-1L], all.vars), FALSE, FALSE)), nam[-1L])], nam[-1L])), none = keep_v(.data, c(if(gdfl) g$group.vars, nam[-1L])), # g$group.vars[g$group.vars %!in% nam[-1L]] -> inconsistent and inefficient... keep_v(.data, c(.keep, nam[-1L]))) oldClass(.data) <- cld return(condalc(.data, any(cld == "data.table"))) } # or mut / mte? () If you need o choose a vowel, u is more distinctive, lut for consistency let's stock with consonants mtt <- fmutate # Note: see if function(.data, ...) fmutate(.data, ...) is possible (what about objects in global environment?) # OLD versions and experimental stuff: # fssm <- function(x, subset) { # not faster than native [ !! # ax <- attributes(x) # d <- dim(x) # ax[["dimnames"]][[1L]] <- ax[["dimnames"]][[1L]][subset] # ax[["dim"]] <- c(length(subset), d[2L]) # ic <- seq_len(d[2L]) * d[1L] - d[1L] # setAttributes(.Call(C_subsetVector, x, outer(subset, ic, FUN = "+"), TRUE), ax) # } # Older version: But classes for [ can also be very useful for certain objects !! # fsubset.matrix <- function(x, subset, select, drop = FALSE, ...) { # if(!missing(...)) stop("Unknown argument ", dotstostr(...)) # if(missing(select)) { # if(is.object(x)) return(`oldClass<-`(unclass(x)[subset, , drop = drop], oldClass(x))) else # return(x[subset, , drop = drop]) # } else { # nl <- as.vector(1L:ncol(x), "list") # names(nl) <- dimnames(x)[[2L]] # vars <- eval(substitute(select), nl, parent.frame()) # if(is.object(x)) { # if(missing(subset)) return(`oldClass<-`(unclass(x)[, vars, drop = drop], class(x))) else # return(`oldClass<-`(unclass(x)[subset, vars, drop = drop], oldClass(x))) # } else { # if(missing(subset)) return(x[, vars, drop = drop]) else # return(x[subset, vars, drop = drop]) # } # } # } # older version -> more like base::subset # fsubset.data.frame <- function(x, subset, select, ...) { # if(!missing(...)) stop("Unknown argument ", dotstostr(...)) # if(missing(select)) vars <- seq_along(unclass(x)) else { # nl <- `names<-`(as.vector(seq_along(unclass(x)), "list"), attr(x, "names")) # vars <- eval(substitute(select), nl, parent.frame()) # if(!is.integer(vars)) vars <- if(is.character(vars)) ckmatch(vars, names(nl)) else which(vars) # } # Best solution ?? # if(missing(subset)) return(colsubset(x, vars)) else { # if(is.atomic(subset)) # rep_len(TRUE, length(x[[1L]])) else { # r <- eval(substitute(subset), x, parent.frame()) # # e <- substitute(subset) # if(e[[1L]] == ":") ... but what about objects? -> just keep this !! # if(is.logical(r)) r <- which(r) # which(r & !is.na(r)) is.na not needed !! # } # improve qDF !!! # rn <- attr(x, "row.names") # || is.integer(rn) # maybe many have character converted integers ?? # if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") return(.Call(C_subsetDT, x, r, vars, TRUE)) # return(`attr<-`(.Call(C_subsetDT, x, r, vars, TRUE), "row.names", rn[r])) # fast ?? scalable ?? # } # transform(mtcars, newc = cyl > 5, bla = cyl > 3) # See also with and within. What about keeping attributes ?? collapse/R/list_functions.R0000644000176200001440000003243714167362265015470 0ustar liggesusersrapply2d <- function(l, FUN, ..., classes = "data.frame") { aply2d <- function(y) if(is.list(y) && !inherits(y, classes)) lapply(y, aply2d) else FUN(y, ...) # is.null(dim(y)) # qsu output shows list of DF can have dim attr. aply2d(l) # lapply(x,aply2d) # if this is enabled, rapply2d takes apart data.frame if passed } get_elem_FUN <- function(x, FUN, return = "sublist", keep_class = FALSE) switch(return, sublist = if(keep_class) fcolsubset(x, vapply(`attributes<-`(x, NULL), FUN, TRUE)) else .subset(x, vapply(`attributes<-`(x, NULL), FUN, TRUE)), names = attr(x, "names")[vapply(`attributes<-`(x, NULL), FUN, TRUE)], indices = which(vapply(`attributes<-`(x, NULL), FUN, TRUE)), named_indices = which(`names<-`(vapply(`attributes<-`(x, NULL), FUN, TRUE), attr(x, "names"))), logical = vapply(`attributes<-`(x, NULL), FUN, TRUE), named_logical = `names<-`(vapply(`attributes<-`(x, NULL), FUN, TRUE), attr(x, "names")), stop("Unknown return option!")) list_elem <- function(l, return = "sublist", keep.class = FALSE) get_elem_FUN(l, is.list, return, keep.class) atomic_elem <- function(l, return = "sublist", keep.class = FALSE) get_elem_FUN(l, is.atomic, return, keep.class) "list_elem<-" <- function(l, value) { al <- attributes(l) ilv <- is.list(value) len <- if(ilv) length(value) else 1L attributes(l) <- NULL # vapply without attributes is faster ! ind <- which(vapply(l, is.list, TRUE)) if(len != length(ind)) stop("length(value) must match length(list_elem(l))") if(ilv) l[ind] <- value else l[[ind]] <- value if(ilv && length(nam <- names(value))) al[["names"]][ind] <- nam setAttributes(l, al) } "atomic_elem<-" <- function(l, value) { al <- attributes(l) ilv <- is.list(value) len <- if(ilv) length(value) else 1L attributes(l) <- NULL ind <- which(vapply(l, is.atomic, TRUE)) if(len != length(ind)) stop("length(value) must match length(list_elem(l))") if(ilv) l[ind] <- value else l[[ind]] <- value if(ilv && length(nam <- names(value))) al[["names"]][ind] <- nam setAttributes(l, al) } is_regular <- function(x) is.list(x) || is.atomic(x) # fastest way? is.regular <- function(x) { .Deprecated(msg = "is.regular is depreciated, see help('collapse-depreciated')") is.list(x) || is.atomic(x) } is_unlistable <- function(l, DF.as.list = FALSE) if(DF.as.list) all(unlist(rapply(l, is.atomic, how = "list"), use.names = FALSE)) else all(unlist(rapply2d(l, is_regular), use.names = FALSE)) # fastest way? is.unlistable <- function(l, DF.as.list = FALSE) { message("Note that 'is.unlistable' was renamed to 'is_unlistable'. It will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") is_unlistable(l, DF.as.list) } # If data.frame, search all, otherwise, make optional counting df or not, but don't search them. ldepth <- function(l, DF.as.list = FALSE) { if (inherits(l, "data.frame")) { # fast defining different functions in if-clause ? ld <- function(y,i) if(is.list(y)) lapply(y,ld,i+1L) else i } else if(DF.as.list) { ld <- function(y,i) { df <- inherits(y, "data.frame") if(is.list(y) && !df) lapply(y,ld,i+1L) else i+df } } else { ld <- function(y,i) if(is.list(y) && !inherits(y, "data.frame")) lapply(y,ld,i+1L) else i } base::max(unlist(ld(l, 0L), use.names = FALSE)) } has_elem <- function(l, elem, recursive = TRUE, DF.as.list = FALSE, regex = FALSE, ...) { if(is.function(elem)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(recursive) { if(DF.as.list) return(any(unlist(rapply(l, elem, how = "list"), use.names = FALSE))) return(any(unlist(rapply2d(l, elem), use.names = FALSE))) } return(any(vapply(l, elem, TRUE, USE.NAMES = FALSE))) } else if(is.character(elem)) { if(!regex && !missing(...)) unused_arg_action(match.call(), ...) if(recursive) { oldClass(l) <- NULL # in case [ behaves weird is.subl <- if(DF.as.list) is.list else function(x) is.list(x) && !inherits(x, "data.frame") # could do without, but it seems to remove data.frame attributes, and more speed! namply <- function(y) if(any(subl <- vapply(y, is.subl, TRUE))) c(names(subl), unlist(lapply(.subset(y, subl), namply), use.names = FALSE)) else names(y) # also overall subl names are important, and .subset for DT subsetting ! # names(which(!subl)) # names(y)[!subl] # which is faster? if(regex) return(length(rgrep(elem, namply(l), ...)) > 0L) else return(any(namply(l) %in% elem)) } else if(regex) return(length(rgrep(elem, names(l), ...)) > 0L) else return(any(names(l) %in% elem)) } else stop("elem must be a function or character vector of element names or regular expressions") } # Experimental: # elem_names <- function(l, how = c("list", "unlist"), DF.as.list = TRUE) { # need right order for method how = list !! # namply <- function(y) if(any(subl <- vapply(y, is.subl, TRUE))) c(names(subl), lapply(.subset(y, subl), namply)) else names(subl) # switch(how[1L], # unlist = names(rapply(l, function(x) NA)), # list = # ) rapply(l, function(x) NULL) # # } # General note: What about lists containing data.tables ? '[' subsetting will be wrong ! list_extract_FUN <- function(l, FUN, is.subl, keep.tree = FALSE) { regsearch <- function(x) { if(any(subl <- vapply(x, is.subl, TRUE, USE.NAMES = FALSE))) { # is.list(x) && a wsubl <- which(subl) wnsubl <- whichv(subl, FALSE) matches <- vapply(x[wnsubl], FUN, TRUE, USE.NAMES = FALSE) a <- lapply(x[wsubl], regsearch) wa <- vlengths(a, FALSE) > 0L # note that this also gets rid of null elements! could make it length or is.null! # vapply(a, length, 1L, USE.NAMES = FALSE) x <- c(x[wnsubl][matches], a[wa]) # The problem here: If all elements in a sublist are atomic, it still retains the sublist itself with NULL inside! if(keep.tree || length(x) != 1L) return(x[forder.int(c(wnsubl[matches], wsubl[wa]))]) else return(x[[1L]]) # fastest way? } else if(length(x)) { # This ensures correct behavior in the final nodes: if (length(x)) because problem encountered in get.elem(V, is.matrix) -> empty xlevels list, the lapply below does not execute matches <- which(vapply(x, FUN, TRUE, USE.NAMES = FALSE)) if(keep.tree || length(matches) != 1L) return(x[matches]) else return(x[[matches]]) # needs to be != } } regsearch(l) } list_extract_regex <- function(l, exp, is.subl, keep.tree = FALSE, ...) { regsearch <- function(x) { if(any(subl <- vapply(x, is.subl, TRUE, USE.NAMES = FALSE))) { matches <- rgrepl(exp, names(x), ...) wres <- which(matches) # wres <- rgrep(exp, names(x), ...) wnressubl <- if(length(wres)) which(subl & !matches) else which(subl) # fsetdiff(which(subl), wres) if(length(wnressubl)) { # faster way? a <- lapply(x[wnressubl], regsearch) # is this part still necessary?, or only for keep.tree wa <- vlengths(a, FALSE) > 0L # note that this also gets rid of null elements!! could make it length or is.null!, length is better for length 0 lists !! # vapply(a, length, 1L) x <- c(x[wres], a[wa]) if(keep.tree || length(x) != 1L) return(x[forder.int(c(wres, wnressubl[wa]))]) else return(x[[1L]]) } else if(keep.tree || length(wres) != 1L) return(x[wres]) else return(x[[wres]]) } else { # This ensures correct behavior in the final nodes: matches <- rgrep(exp, names(x), ...) if(keep.tree || length(matches) != 1L) return(x[matches]) else return(x[[matches]]) # needs to be != } } regsearch(l) } list_extract_names <- function(l, nam, is.subl, keep.tree = FALSE) { regsearch <- function(x) { if(any(subl <- vapply(x, is.subl, TRUE, USE.NAMES = FALSE))) { matches <- names(x) %in% nam wres <- which(matches) # match(nam, names(x), 0L) # better bcause gives integer(0) -> necessary as cannot do l[[0L]] wnressubl <- if(length(wres)) which(subl & !matches) else which(subl) # fsetdiff(which(subl), wres) # old solution: faster but does not work well if parent list is unnamed ! (i.e. l = list(lm1, lm1)) if(length(wnressubl)) { a <- lapply(x[wnressubl], regsearch) wa <- vlengths(a, FALSE) > 0L # vapply(a, length, 1L) x <- c(x[wres], a[wa]) if(keep.tree || length(x) != 1L) return(x[forder.int(c(wres, wnressubl[wa]))]) else return(x[[1L]]) } else if(keep.tree || length(wres) != 1L) return(x[wres]) else return(x[[wres]]) } else { matches <- which(names(x) %in% nam) if(keep.tree || length(matches) != 1L) return(x[matches]) else return(x[[matches]]) # needs to be !=, because interger(0) goes in first.. } } regsearch(l) } # Idea: Also use indices and logical vectors ? i.e. get first two columns of alist of data.frames ? # This behaves a bit differently (not find elements everywhere, but also subset inside the list) list_extract_ind <- function(l, ind, is.subl, keep.tree = FALSE) { if(is.logical(ind)) ind <- which(ind) if(length(ind) > 1L || keep.tree) { regsearch <- function(x) if(is.subl(x)) lapply(x, regsearch) else x[ind] } else { regsearch <- function(x) if(is.subl(x)) lapply(x, regsearch) else x[[ind]] } regsearch(l) } # Note: all functions currently remove empty list elements ! # keep.tree argument still issues wih xlevels get_elem <- function(l, elem, recursive = TRUE, DF.as.list = FALSE, keep.tree = FALSE, keep.class = FALSE, regex = FALSE, ...) { if(recursive) { # possibly if is.list(x) is redundant, because you check above! -> nah, recursive? is.subl <- if(DF.as.list) is.list else function(x) is.list(x) && !inherits(x, "data.frame") # could do without, but it seems to remove data.frame attributes if(keep.class) al <- attributes(l) # cll <- class(l) # perhaps generalize to other attributes? if(is.function(elem)) { if(!missing(...)) unused_arg_action(match.call(), ...) l <- list_extract_FUN(l, elem, is.subl, keep.tree) } else if(is.character(elem)) { if(regex) l <- list_extract_regex(l, elem, is.subl, keep.tree, ...) else { if(!missing(...)) unused_arg_action(match.call(), ...) l <- list_extract_names(l, elem, is.subl, keep.tree) } } else { if(!missing(...)) unused_arg_action(match.call(), ...) l <- list_extract_ind(l, elem, is.subl, keep.tree) } if(keep.class && is.list(l)) { al[["names"]] <- names(l) return(setAttributes(l, al)) # class(l) <- cll # when drop.tree is proper, l might not be a list } else return(l) } else { if(is.function(elem)) { if(!missing(...)) unused_arg_action(match.call(), ...) elem <- which(vapply(l, elem, TRUE, USE.NAMES = FALSE)) } else if(is.character(elem)) { if(regex) elem <- rgrep(elem, names(l), ...) else { if(!missing(...)) unused_arg_action(match.call(), ...) elem <- which(names(l) %in% elem) } } else if(is.logical(elem)) { if(!missing(...)) unused_arg_action(match.call(), ...) elem <- which(elem) # else stop("elem must be a function, character vector or vector of regular expressions!") } if(keep.tree || length(elem) != 1L) { if(keep.class) return(fcolsubset(l, elem)) else return(.subset(l, elem)) # <- # base::Filter(elem, l) } else return(.subset2(l, elem)) } } # there is base::getElement reg_elem <- function(l, recursive = TRUE, keep.tree = FALSE, keep.class = FALSE) { if(keep.class) al <- attributes(l) # if(inherits(l, "data.frame")) if(keep.class) return(l) else return(unattrib(l)) if(recursive) { is.subl <- function(x) is.list(x) && !inherits(x, "data.frame") l <- list_extract_FUN(l, is_regular, is.subl, keep.tree) if(keep.class && is.list(l)) { al[["names"]] <- names(l) return(setAttributes(l, al)) } else return(l) } else { matches <- which(vapply(l, is_regular, TRUE, USE.NAMES = FALSE)) # l <- base::Filter(is_regular,l) if(keep.tree || length(matches) != 1L) { if(keep.class) return(fcolsubset(l, matches)) else return(.subset(l, matches)) } else return(.subset2(l, matches)) } } irreg_elem <- function(l, recursive = TRUE, keep.tree = FALSE, keep.class = FALSE) { is.irregular <- function(x) !(is.list(x) || is.atomic(x)) # is.irregular fastest way? if(keep.class) al <- attributes(l) # if(inherits(l, "data.frame")) stop("A data.frame is a regular object!") if(recursive) { is.subl <- function(x) is.list(x) && !inherits(x, "data.frame") l <- list_extract_FUN(l, is.irregular, is.subl, keep.tree) if(keep.class && is.list(l)) { al[["names"]] <- names(l) return(setAttributes(l, al)) } else return(l) } else { matches <- which(vapply(l, is.irregular, TRUE, USE.NAMES = FALSE)) # l <- base::Filter(is_regular,l) if(keep.tree || length(matches) != 1L) { if(keep.class) return(fcolsubset(l, matches)) else return(.subset(l, matches)) } else return(.subset2(l, matches)) } } # TODO: See about big objects! #microbenchmark(all(rapply(lm,is.atomic)),!is.list(unlist(lm, use.names = FALSE)),all(unlist(rapply2d(lm,is.std), use.names = FALSE))) #microbenchmark(all(rapply(GGDC,is.atomic)),!is.list(unlist(GGDC, use.names = FALSE)),all(unlist(rapply2d(GGDC,is.std), use.names = FALSE))) collapse/R/unlist2d.R0000644000176200001440000002457114167332054014162 0ustar liggesusers unlist2d <- function(l, idcols = ".id", row.names = FALSE, recursive = TRUE, id.factor = FALSE, DT = FALSE) { if (!is.list(l)) return(l) # stop("l is not a list") makeids <- !isFALSE(idcols) if(makeids) id.names <- if(isTRUE(idcols)) ".id" else idcols[1L] keeprn <- !isFALSE(row.names) if(keeprn) row.names <- switch(row.names, `TRUE` = "row.names", row.names) idfac <- !isFALSE(id.factor) if(idfac) fcclass <- switch(id.factor, `TRUE` = "factor", ordered = c("ordered", "factor"), stop('id.factor needs to be FALSE, TRUE or "ordered"')) DATAclass <- if(DT) c("data.table", "data.frame") else "data.frame" DFDTl <- function(l) { attr(l, "row.names") <- .set_row_names(length(.subset2(l, 1L))) `oldClass<-`(l, DATAclass) } idf <- function(x) if(inherits(x, "data.frame")) 2L else if (!length(x)) 1L else 3L*is.atomic(x) # was if(is.null(x)) 1L -> disregards empty list, bug reported # faster way ? : This is not faster: 2L*inherits(x, "data.frame") + is.null(x) + 3L*is.atomic(x) addrn <- function(x) if(any(attr(x, "names") == row.names)) x else c(`names<-`(list(attr(x, "row.names")), row.names), x) # faster way ? attol <- function(x) { # class(x) <- NULL # tables are also arrays, although only 1D, not because of the class but because they have a dimension attribute. if (length(d <- dim(x)) > 1L) { # is.array(x) # length could also be 0... not NULL if (length(d) > 2L) { # breaking down HDA dn <- dimnames(x) dim(x) <- c(d[1L], bprod(d[-1L])) if (length(dn)) { for (i in 2L:length(d)) if(is.null(dn[[i]])) dn[[i]] <- seq_len(d[i]) dimnames(x) <- list(dn[[1L]], interact_names(dn[-1L])) # Good ? } } if(keeprn) { dn <- dimnames(x) x <- `names<-`(c(list(if(is.null(dn[[1L]])) seq_len(d[1L]) else dn[[1L]]), .Call(Cpp_mctl, x, FALSE, 0L)), c(row.names, dn[[2L]])) } else x <- .Call(Cpp_mctl, x, TRUE, 0L) } else x <- as.vector(x, "list") if (is.null(names(x))) names(x) <- paste0("V", seq_along(x)) # it seems this is not yet working for all (i.e. model objects..), also perhaps not start at V1, depending on what other columsn there are.. i.e start at the right position ? return(x) } ul2d <- function(y) { if(inherits(y, "data.frame") || is.atomic(y)) return(y) oldClass(y) <- NULL # perhaps unclassing y would put more safety ? -> yes ! ident <- vapply(`attributes<-`(y, NULL), idf, 1L) # removes names ? if(is.list(y) && all(ident > 0L)) { if(any(at <- ident == 3L)) y[at] <- lapply(y[at], attol) if(keeprn && any(df <- ident == 2L)) y[df] <- lapply(y[df], addrn) # better cbind for data.table ? or x[["row.names"]] =.. and the sort later ? if(makeids) { if(idfac) { y <- y[ident != 1L] # better way ? y[ident!=1L] = NULL ? nam <- names(y) if(length(nam)) names(y) <- NULL else nam <- as.character(seq_along(y)) y <- DFDTl(.Call(C_rbindlist, y, TRUE, TRUE, id.names)) setattributes(.subset2(y, 1L), pairlist(levels = nam, class = fcclass)) return(y) } else return(DFDTl(.Call(C_rbindlist, y[ident != 1L], TRUE, TRUE, id.names))) } else return(DFDTl(.Call(C_rbindlist, y[ident != 1L], TRUE, TRUE, NULL))) } else lapply(y, ul2d) } l <- ul2d(l) if(recursive) { while(!inherits(l, "data.frame")) l <- ul2d(l) if(makeids) { nams <- attr(l, "names") ids <- whichv(nams, id.names) nid <- length(ids) if(nid > 1L) { nids <- seq_len(nid) attr(l, "names")[ids] <- if(length(idcols) == nid) idcols else paste(id.names, nids, sep = ".") if(keeprn) { rn <- whichv(nams, row.names) # with more id's, row.names are automatically generated from the sub-data.frames.. if(!all(ids == nids) || rn != nid + 1L) .Call(C_setcolorder, l, c(ids, rn, seq_along(nams)[-c(ids, rn)])) } else if (!all(ids == nids)) .Call(C_setcolorder, l, c(ids, seq_along(nams)[-ids])) } else if(keeprn) { # makes sure row.names comes after ids, even if only one id! rn <- whichv(nams, row.names) # length(rn) needed when only vectors... no row names column... if(length(rn) && rn != 2L) .Call(C_setcolorder, l, c(ids, rn, seq_along(nams)[-c(ids, rn)])) } } else if (keeprn) { nams <- attr(l, "names") rn <- whichv(nams, row.names) if(length(rn) && rn != 1L) .Call(C_setcolorder, l, c(rn, seq_along(nams)[-rn])) } if(DT) return(alc(l)) } # attr(l, ".internal.selfref") <- NULL l } # Examples: # # example: # l = lapply(split(iris[1:4], iris[5]), function(x)list(N = fnobs(x), mean = fmean(x), sd = fsd(x))) # # # neat example: # l = list(a = mtcars[1:8],b = list(c = mtcars[4:11], d = list(e = mtcars[2:10]))) # unlist2d(rapply2d(l,colMeans), recursive = FALSE) # unlist2d(rapply2d(l,colMeans)) # now error again!!! # unlist2d(l) # unlist2d(rapply2d(l,dapply,sd)) # # unlist2d(split(iris,iris["Species"])) # # nl = lapply(split(mtcars,mtcars[2]),function(x)split(x,x["vs"])) # str(nl) # View(unlist2d(nl)) # View(unlist2d(nl, idcols = FALSE)) # View(unlist2d(nl, idcols = c(".cyl",".vs"))) # str(unlist2d(nl, recursive = FALSE)) # why is .id a character string?? -> names!! # # # Neat example: # # list.elem(IRF) %>% rapply2d(colSums) %>% unlist2d(c("type","shock")) %>% filter(type == "irf") %>% num.vars %>% dapply(function(x)bsum(abs(x)),MARGIN = 1) # # unlist2d(qsu(mtcars,~cyl,~vs, data.out = TRUE)) # not dim, but is.data.frame # # unlist2d(rapply2d(reg.elem(SV),dim)) # error!! lots of nested NULL's .. doesn't perform # # UNTIL collapse 1.2.1 ... now better row.names handling, and some comments removed ... # unlist2d <- function(l, idcols = ".id", row.names = FALSE, recursive = TRUE, id.factor = FALSE, DT = FALSE) { # # if (!is.list(l)) return(l) # stop("l is not a list") # make.ids <- !isFALSE(idcols) # if(make.ids) id.names <- if(isTRUE(idcols)) ".id" else idcols[1L] # keep.row.names <- !isFALSE(row.names) # if(keep.row.names) row.names <- if(isTRUE(row.names)) "row.names" else row.names[1L] # # DFDTl <- function(l) { # attr(l, "row.names") <- .set_row_names(length(l[[1L]])) # class(l) <- if(DT) c("data.table", "data.frame") else "data.frame" # l # } # idf <- function(x) if(inherits(x, "data.frame")) 2L else if (is.null(x)) 1L else 3L*is.atomic(x) # faster way ? # addrn <- function(x) if(any(attr(x, "names") == row.names)) x else c(`names<-`(list(attr(x, "row.names")), row.names),x) # faster way ? # attol <- function(x) { # # class(x) <- NULL # tables are also arrays, although only 1D, not because of the class but because they have a dimension attribute. # if (length(d <- dim(x)) > 1L) { # is.array(x) # length could also be 0... # # d <- dim(x) # if (length(d) > 2L) { # breaking down HDA # dn <- dimnames(x) # dim(x) <- c(d[1L], bprod(d[-1L])) # if (length(dn)) { # for (i in 2L:length(d)) if(is.null(dn[[i]])) dn[[i]] <- seq_len(d[i]) # dimnames(x) <- list(dn[[1L]], interact_names(dn[-1L])) # Good ? # } # } # dn <- dimnames(x) # if(keep.row.names && length(dn[[1L]])) # x <- `names<-`(c(list(dn[[1L]]), .Call(Cpp_mctl, x, FALSE, 0L)), c(row.names, dn[[2L]])) else # x <- `names<-`(.Call(Cpp_mctl, x, FALSE, 0L), dn[[2L]]) # } else x <- as.vector(x, "list") # if (is.null(names(x))) names(x) <- paste0("V", seq_along(x)) # it seems this is not yet working for all (i.e. model objects..), also perhaps not start at V1, depending on what other columsn there are.. i.e start at the right position ? # return(x) # } # ul2d <- function(y) { # if(inherits(y, "data.frame") || is.atomic(y)) return(y) # class(y) <- NULL # perhaps unclassing y would put more safety ? -> yes ! # ident <- vapply(y, idf, 1L) # `attributes<-`(y, NULL) # possibly you can still get a few microseconds in the apply commands, but beware, this removes names in output ! # if(is.list(y) && all(ident > 0L)) { # at <- ident == 3L # if(any(at)) y[at] <- lapply(y[at], attol) # if(keep.row.names && any(df <- ident == 2L)) y[df] <- lapply(y[df], addrn) # better cbind for data.table ? or x[["row.names"]] =.. and the sort later ? # if(make.ids) { # if(id.factor) { # y <- y[ident != 1L] # better way ? y[ident!=1L] = NULL ? # nam <- names(y) # names(y) <- NULL # y <- DFDTl(.Call(C_rbindlist, y, TRUE, TRUE, id.names)) # # attributes(y[[1L]]) <- list(levels = nam, class = c("ordered", "factor")) # setattributes(y[[1L]], pairlist(levels = nam, class = c("ordered", "factor"))) # a lot faster ! # return(y) # } else return(DFDTl(.Call(C_rbindlist, y[ident != 1L], TRUE, TRUE, id.names))) # } else return(DFDTl(.Call(C_rbindlist, y[ident != 1L], TRUE, TRUE, NULL))) # } else lapply(y, ul2d) # } # # l <- ul2d(l) # if(recursive) { # while(!inherits(l, "data.frame")) l <- ul2d(l) # if(make.ids) { # nams <- attr(l, "names") # ids <- which(nams == id.names) # nid <- length(ids) # if(nid > 1L) { # make sure row.names comes after ids, even if only one id! # nids <- seq_len(nid) # attr(l, "names")[ids] <- if(length(idcols) == nid) idcols else paste(id.names, nids, sep = ".") # if(keep.row.names) { # New! It seems it lost a bit of speed through this part # rn <- which(nams == row.names) # New! # if(!all(ids == nids) || rn != nid + 1L) .Call(C_setcolorder, l, c(ids, rn, seq_along(l)[-c(ids,rn)])) # l <- l[c(ids,rn,seq_along(l)[-c(ids,rn)])] # New! efficient? could replace only rownames if one of the conditions holds # } else if (!all(ids == nids)) .Call(C_setcolorder, l, c(ids, seq_along(l)[-ids])) # l <- l[c(ids,seq_along(l)[-ids])] # Old! before row.names! # } # } else if (keep.row.names) { # New! # rn <- which(attr(l, "names") == row.names) # New! # if(rn != 1L) .Call(C_setcolorder, l, c(rn,seq_along(l)[-rn])) # l <- l[c(rn,seq_along(l)[-rn])] # New! # } # } # attr(l, ".internal.selfref") <- NULL # # setattrib(l, ".internal.selfref", NULL) # what if recursive = FALSE -> doesn't work.. but takes more time!! # return(l) # } collapse/R/flag.R0000644000176200001440000002062414174223734013324 0ustar liggesusers flag <- function(x, n = 1, ...) UseMethod("flag") # , x flag.default <- function(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(UseMethod("flag", unclass(x))) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_flaglead,x,n,fill,0L,0L,G_t(t),stubs)) g <- G_guo(g) .Call(Cpp_flaglead,x,n,fill,g[[1L]],g[[2L]],G_t(t),stubs) } flag.pseries <- function(x, n = 1, fill = NA, stubs = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) index <- unclass(getpix(attr(x, "index"))) if(length(index) > 2L) index <- list(finteraction(index[-length(index)]), index[[length(index)]]) g <- index[[1L]] t <- index[[2L]] tlev <- attr(t, "levels") oldopts <- options(warn = -1L) on.exit(options(oldopts)) if(is.finite(as.integer(tlev[1L]))) t <- as.integer(tlev)[t] if(length(n) > 1 && is.factor(x)) x <- setNames(as.character(x), names(x)) if(is.matrix(x)) .Call(Cpp_flagleadm,x,n,fill,fnlevels(g),g,t,stubs) else .Call(Cpp_flaglead,x,n,fill,fnlevels(g),g,t,stubs) } flag.matrix <- function(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = length(n) > 1L, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_flagleadm,x,n,fill,0L,0L,G_t(t),stubs)) g <- G_guo(g) .Call(Cpp_flagleadm,x,n,fill,g[[1L]],g[[2L]],G_t(t),stubs) } flag.grouped_df <- function(x, n = 1, t = NULL, fill = NA, stubs = length(n) > 1L, keep.ids = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) tsym <- all.vars(substitute(t)) nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) if(length(tsym) && !anyNA(tn <- match(tsym, nam))) { if(length(tn) == 1L) { if(any(gn == tn)) stop("timevar coincides with grouping variables!") t <- .subset2(x, tn) } else { if(any(gn %in% tn)) stop("timevar coincides with grouping variables!") t <- .subset(x, tn) } gn <- c(gn, tn) } if(length(gn)) { ax <- attributes(x) res <- .Call(Cpp_flagleadl, .subset(x, -gn), n,fill,g[[1L]],g[[2L]],G_t(t),stubs) if(keep.ids) res <- c(.subset(x, gn), res) ax[["names"]] <- names(res) # Works for multiple lags ! return(setAttributes(res, ax)) } .Call(Cpp_flagleadl,x,n,fill,g[[1L]],g[[2L]],G_t(t),stubs) } flag.data.frame <- function(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = length(n) > 1L, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_flagleadl,x,n,fill,0L,0L,G_t(t),stubs)) g <- G_guo(g) .Call(Cpp_flagleadl,x,n,fill,g[[1L]],g[[2L]],G_t(t),stubs) } flag.list <- function(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = length(n) > 1L, ...) flag.data.frame(x, n, g, t, fill, stubs, ...) flag.pdata.frame <- function(x, n = 1, fill = NA, stubs = length(n) > 1L, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) index <- unclass(getpix(attr(x, "index"))) if(length(index) > 2L) index <- list(finteraction(index[-length(index)]), index[[length(index)]]) g <- index[[1L]] t <- index[[2L]] tlev <- attr(t, "levels") oldopts <- options(warn = -1L) on.exit(options(oldopts)) if(is.finite(as.integer(tlev[1L]))) t <- as.integer(tlev)[t] .Call(Cpp_flagleadl,x,n,fill,fnlevels(g),g,t,stubs) } # Lag Operator # use xt instead of by ? L <- function(x, n = 1, ...) UseMethod("L") # , x L.default <- function(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(flag.matrix(x, n, g, t, fill, stubs, ...)) flag.default(x, n, g, t, fill, stubs, ...) } L.pseries <- function(x, n = 1, fill = NA, stubs = TRUE, ...) flag.pseries(x, n, fill, stubs, ...) L.matrix <- function(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = TRUE, ...) flag.matrix(x, n, g, t, fill, stubs, ...) L.grouped_df <- function(x, n = 1, t = NULL, fill = NA, stubs = TRUE, keep.ids = TRUE, ...) { x <- x eval(substitute(flag.grouped_df(x, n, t, fill, stubs, keep.ids, ...))) } L.data.frame <- function(x, n = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, stubs = TRUE, keep.ids = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.call(by) || is.call(t)) { ax <- attributes(x) class(x) <- NULL nam <- names(x) if(is.call(by)) { if(length(by) == 3L) { cols <- ckmatch(all.vars(by[[2L]]), nam, "Unknown variables:") gn <- ckmatch(all.vars(by[[3L]]), nam, "Unknown variables:") } else { gn <- ckmatch(all.vars(by), nam, "Unknown variables:") cols <- if(is.null(cols)) seq_along(x)[-gn] else cols2int(cols, x, nam) } by <- G_guo(if(length(gn) == 1L) x[[gn]] else x[gn]) if(!keep.ids) gn <- NULL } else { gn <- NULL if(length(cols)) cols <- cols2int(cols, x, nam) by <- if(is.null(by)) list(0L, 0L) else G_guo(by) } if(is.call(t)) { tn <- ckmatch(all.vars(t), nam, "Unknown variables:") t1 <- length(tn) == 1L t <- if(t1) x[[tn]] else x[tn] cols <- if(is.null(cols)) seq_along(x)[-tn] else if(t1) cols[cols != tn] else fsetdiff(cols, tn) if(keep.ids) gn <- c(gn, tn) } res <- if(length(gn)) c(x[gn], .Call(Cpp_flagleadl,x[cols],n,fill,by[[1L]],by[[2L]],G_t(t),stubs)) else .Call(Cpp_flagleadl,x[cols],n,fill,by[[1L]],by[[2L]],G_t(t),stubs) ax[["names"]] <- names(res) return(setAttributes(res, ax)) } else if(length(cols)) { # Needs to be like this, otherwise subsetting dropps the attributes ! ax <- attributes(x) class(x) <- NULL x <- x[cols2int(cols, x, names(x), FALSE)] ax[["names"]] <- names(x) setattributes(x, ax) } if(is.null(by)) return(.Call(Cpp_flagleadl,x,n,fill,0L,0L,G_t(t),stubs)) by <- G_guo(by) .Call(Cpp_flagleadl,x,n,fill,by[[1L]],by[[2L]],G_t(t),stubs) } L.list <- function(x, n = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, stubs = TRUE, keep.ids = TRUE, ...) L.data.frame(x, n, by, t, cols, fill, stubs, keep.ids, ...) L.pdata.frame <- function(x, n = 1, cols = is.numeric, fill = NA, stubs = TRUE, keep.ids = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) ax <- attributes(x) nam <- ax[["names"]] index <- unclass(getpix(ax[["index"]])) if(keep.ids) { gn <- which(nam %in% names(index)) if(length(gn) && is.null(cols)) cols <- seq_along(unclass(x))[-gn] } else gn <- NULL if(length(index) > 2L) index <- list(finteraction(index[-length(index)]), index[[length(index)]]) g <- index[[1L]] t <- index[[2L]] tlev <- attr(t, "levels") oldopts <- options(warn = -1L) on.exit(options(oldopts)) if(is.finite(as.integer(tlev[1L]))) t <- as.integer(tlev)[t] if(length(cols)) cols <- cols2int(cols, x, nam, FALSE) if(length(gn) && length(cols)) { class(x) <- NULL # Works for multiple lags ! res <- c(x[gn], .Call(Cpp_flagleadl,x[cols],n,fill,fnlevels(g),g,t,stubs)) ax[["names"]] <- names(res) return(setAttributes(res, ax)) } else if(!length(gn)) # could speed up ? return(.Call(Cpp_flagleadl,fcolsubset(x, cols),n,fill,fnlevels(g),g,t,stubs)) .Call(Cpp_flagleadl,x,n,fill,fnlevels(g),g,t,stubs) } # Lead Operator F <- function(x, n = 1, ...) UseMethod("F") # , x F.default <- function(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(flag.matrix(x, -n, g, t, fill, stubs, ...)) flag.default(x, -n, g, t, fill, stubs, ...) } F.pseries <- function(x, n = 1, fill = NA, stubs = TRUE, ...) flag.pseries(x, -n, fill, stubs, ...) F.matrix <- function(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = TRUE, ...) flag.matrix(x, -n, g, t, fill, stubs, ...) F.grouped_df <- function(x, n = 1, t = NULL, fill = NA, stubs = TRUE, keep.ids = TRUE, ...) { x <- x eval(substitute(flag.grouped_df(x, -n, t, fill, stubs, keep.ids, ...))) } F.data.frame <- function(x, n = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, stubs = TRUE, keep.ids = TRUE, ...) L.data.frame(x, -n, by, t, cols, fill, stubs, keep.ids, ...) F.list <- F.data.frame F.pdata.frame <- function(x, n = 1, cols = is.numeric, fill = NA, stubs = TRUE, keep.ids = TRUE, ...) L.pdata.frame(x, -n, cols, fill, stubs, keep.ids, ...) collapse/R/fsummarise.R0000644000176200001440000002167714174223734014577 0ustar liggesusers# Old, simple version: # fFUN_add_groups <- function(x) { # x$g <- quote(.g_) # Faster than [["g"]] # x$use.g.names <- FALSE # x # } fFUN_smr_add_groups <- function(z) { if(!is.call(z)) return(z) cz <- l1orlst(as.character(z[[1L]])) # needed if collapse::fmean etc.. if(any(cz == .FAST_STAT_FUN_POLD)) { z$g <- quote(.g_) z$use.g.names <- FALSE } # This works for nested calls (nothing more required, but need to put at the end..) if(length(z) > 2L || is.call(z[[2L]])) return(as.call(lapply(z, fFUN_smr_add_groups))) z } # Works: fFUN_smr_add_groups(quote(mean(fmax(min(fmode(mpg))))/fmean(mpg) + fsd(hp) + sum(bla) / 20)) # Old version... # othFUN_compute <- function(x) { # if(length(x) == 2L) # No additional function arguments # return(substitute(.copyMostAttributes_(unlist(lapply(gsplit(a, .g_), b), FALSE, FALSE), a), # list(a = x[[2L]], b = x[[1L]]))) # # With more arguments, things become more complex.. # lapply_call <- as.call(c(list(quote(lapply), substitute(gsplit(a, .g_), list(a = x[[2L]]))), as.list(x[-2L]))) # substitute(.copyMostAttributes_(unlist(a, FALSE, FALSE), b), # list(a = lapply_call, b = x[[2L]])) # } # Note: Need unclass here because of t_list() in do_across(), which only works if also the interior of the list is a list! smr_funi_simple <- function(i, data, .data_, funs, aplvec, ce, ...) { # return(list(i = i, data = data, .data_ = .data_, funs = funs, aplvec = aplvec, ce = ce)) .FUN_ <- funs[[i]] if(aplvec[i]) { value <- if(missing(...)) lapply(unattrib(.data_), .FUN_) else do.call(lapply, c(list(unattrib(.data_), .FUN_), eval(substitute(list(...)), data, ce)), envir = ce) names(value) <- names(.data_) } else if(any(i == .FAST_STAT_FUN_POLD)) { if(missing(...)) return(unclass(.FUN_(.data_, drop = FALSE))) fcal <- as.call(c(list(as.name(i), quote(.data_)), as.list(substitute(list(...))[-1L]))) fcal$drop <- FALSE return(unclass(eval(fcal, c(list(.data_ = .data_), data), ce))) } else { value <- if(missing(...)) .FUN_(.data_) else do.call(.FUN_, c(list(.data_), eval(substitute(list(...)), data, ce)), envir = ce) oldClass(value) <- NULL } return(value) # Check is already done at the end... # if(all_eq(vlengths(value, FALSE))) stop("All computations must result in data values of equal length") } smr_funi_grouped <- function(i, data, .data_, funs, aplvec, ce, ...) { g <- data[[".g_"]] .FUN_ <- funs[[i]] if(aplvec[i]) { value <- if(missing(...)) lapply(unattrib(.data_), copysplaplfun, g, .FUN_) else dots_apply_grouped(.data_, g, .FUN_, eval(substitute(list(...)), data, ce)) names(value) <- names(.data_) } else if(any(i == .FAST_STAT_FUN_POLD)) { if(missing(...)) return(unclass(.FUN_(.data_, g = g, use.g.names = FALSE))) fcal <- as.call(c(list(as.name(i), quote(.data_), g = quote(.g_)), as.list(substitute(list(...))[-1L]))) fcal$use.g.names <- FALSE return(unclass(eval(fcal, c(list(.data_ = .data_), data), ce))) } else { value <- dots_apply_grouped_bulk(.data_, g, .FUN_, if(missing(...)) NULL else eval(substitute(list(...)), data, ce)) value <- .Call(C_rbindlist, unclass(value), FALSE, FALSE, NULL) oldClass(value) <- NULL } return(value) # Again checks are done below } fsummarise <- function(.data, ..., keep.group_vars = TRUE) { if(!is.list(.data)) stop(".data needs to be a list of equal length columns or a data.frame") e <- substitute(list(...)) nam <- names(e) nullnam <- is.null(nam) pe <- parent.frame() cld <- oldClass(.data) # This needs to be called cld, because across fetches it from here !! if(any(cld == "grouped_df")) { oldClass(.data) <- NULL g <- GRP.grouped_df(.data, call = FALSE) attr(.data, "groups") <- NULL ax <- attributes(.data) ax[["class"]] <- fsetdiff(cld, c("GRP_df", "grouped_df")) .data[[".g_"]] <- g .data[[".gsplit_"]] <- gsplit res <- vector("list", length(e)) for(i in 2:length(e)) { # This is good and very fast ei <- e[[i]] if(nullnam || nam[i] == "") { # Across if(ei[[1L]] != quote(across) && ei[[1L]] != quote(acr)) stop("expressions need to be named or start with across(), or its shorthand acr().") ei[[1L]] <- quote(do_across) ei$.eval_funi <- quote(smr_funi_grouped) # return(eval(ei, list(do_across = do_across, smr_funi_grouped = smr_funi_grouped), pe)) res[[i]] <- eval(ei, list(do_across = do_across, smr_funi_grouped = smr_funi_grouped), pe) } else { # Tagged vector expressions eiv <- all.names(ei) res[[i]] <- list(if(any(eiv %in% .FAST_STAT_FUN_POLD)) # startsWith(eiv, .FAST_STAT_FUN_POLD) Note: startsWith does not reliably capture expressions e.g. e <- quote(list(b = fmean(log(mpg)) + max(qsec))) does not work !! eval(fFUN_smr_add_groups(ei), .data, pe) else do_grouped_expr(ei, eiv, .data, g, pe)) } } names(res) <- nam res[[1L]] <- if(keep.group_vars) g$groups else NULL res <- unlist(res, FALSE, use.names = TRUE) # replicating groups if more rows per computation... if(!all_eq(lr <- vlengths(res, FALSE))) { if(!keep.group_vars) stop("all computations need to result in vectors of equal length") gi <- seq_along(g$group.vars) ef <- lr[length(gi)+1L] / g[[1L]] if(!all_eq(lr[-gi]) || ef %% 1 > 0) stop("all computations need to result in vectors of equal length") res[gi] <- .Call(C_subsetDT, res[gi], rep(seq_len(g[[1L]]), each = ef), gi, FALSE) # Using C_subsetvector is not really faster... (1-2 microseconds gain) } } else { # Without groups... ax <- attributes(.data) oldClass(.data) <- NULL # Not strictrly needed but just to make sure execution is efficient in across etc.. if(nullnam || bsum(!nzchar(nam)) > 1L) { # Likely Across statement... for(i in 2:length(e)) { ei <- e[[i]] if(nullnam || nam[i] == "") { if(ei[[1L]] != quote(across) && ei[[1L]] != quote(acr)) stop("expressions need to be named or start with across(), or its shorthand acr().") ei[[1L]] <- quote(.do_across) ei$.eval_funi <- quote(.smr_funi_simple) e[[i]] <- ei } else e[[i]] <- as.call(list(quote(list), ei)) } # return(eval(e, c(.data, list(.do_across = do_across, .smr_funi_simple = smr_funi_simple)), pe)) res <- unlist(eval(e, c(.data, list(.do_across = do_across, .smr_funi_simple = smr_funi_simple)), pe), FALSE, use.names = TRUE) } else res <- eval(e, .data, pe) # return(res) if(!all_eq(vlengths(res, FALSE))) stop("all computations need to result in vectors of equal length") } ax[["names"]] <- names(res) ax[["row.names"]] <- .set_row_names(length(res[[1L]])) return(condalcSA(res, ax, any(cld == "data.table"))) } smr <- fsummarise # sumr # -> yes, but above is more consistent with other shortcuts # Some speed improvement before gsplit: # # othFUN_compute <- function(x) { # if(length(x) == 2L) # No additional function arguments # return(substitute(copyMostAttrib(unlist(lapply(split(a, .g_f), b), FALSE, FALSE), a), # list(a = x[[2L]], b = x[[1L]]))) # # With more arguments, things become more complex.. # lapply_call <- as.call(c(list(quote(lapply), substitute(split(a, .g_f), list(a = x[[2L]]))), as.list(x[-2L]))) # substitute(copyMostAttrib(unlist(a, FALSE, FALSE), b), # list(a = lapply_call, b = x[[2L]])) # } # # fsummarise <- function(.data, ..., keep.group_vars = TRUE) { # if(!is.list(.data)) stop(".data needs to be a list of equal length columns or a data.frame") # e <- substitute(list(...)) # cld <- oldClass(.data) # dm <- c(list(.g_ = g), .data) # ofl <- TRUE # if(any(cld == "grouped_df")) { # g <- GRP.grouped_df(.data, call = FALSE) # ax <- attributes(fungroup(.data)) # # FUNs <- vapply(e[-1L], function(x) as.character(x[[1L]]), character(1L), USE.NAMES = FALSE) # # if(any(FUNs %!in% .FAST_STAT_FUN_POLD)) ... # for(i in seq_along(e)[-1L]) { # This is good and very fast # ei <- e[[i]] # if(any(startsWith(as.character(ei[[1L]]), .FAST_STAT_FUN_POLD))) { # could pass collapse::flast.default etc.. # e[[i]] <- fFUN_add_groups(ei) # } else { # if(ofl) { # dm$.g_f <- as_factor_GRP(g) # ofl <- FALSE # } # e[[i]] <- othFUN_compute(ei) # } # } # res <- eval(e, dm, parent.frame()) # if(keep.group_vars) res <- c(g[["groups"]], res) # ax[["names"]] <- names(res) # ax[["row.names"]] <- .set_row_names(g[[1L]]) # return(condalcSA(res, ax, any(cld == "data.table"))) # } # ax <- attributes(.data) # res <- eval(e, .data, parent.frame()) # ax[["names"]] <- names(res) # ax[["row.names"]] <- 1L # return(condalcSA(res, ax, any(cld == "data.table"))) # } collapse/R/TRA.R0000644000176200001440000001223614056170537013042 0ustar liggesusers TRA <- function(x, STATS, FUN = "-", ...) UseMethod("TRA") # , x TRA.default <- function(x, STATS, FUN = "-", g = NULL, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(TRA.matrix(x, STATS, FUN, g, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_TRA,x,STATS,0L,TtI(FUN))) if(is.atomic(g)) { if(is.nmfactor(g)) { if(fnlevels(g) != length(STATS)) stop("number of groups must match length(STATS)") } else { g <- qG(g, na.exclude = FALSE) # needs to be ordered to be compatible with fast functions !! if(attr(g, "N.groups") != length(STATS)) stop("number of groups must match length(STATS)") } return(.Call(Cpp_TRA,x,STATS,g,TtI(FUN))) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = FALSE, call = FALSE) if(g[[1L]] != length(STATS)) stop("number of groups must match length(STATS)") .Call(Cpp_TRA,x,STATS,g[[2L]],TtI(FUN)) } TRA.matrix <- function(x, STATS, FUN = "-", g = NULL, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_TRAm,x,STATS,0L,TtI(FUN))) if(is.atomic(g)) { if(is.nmfactor(g)) { if(fnlevels(g) != nrow(STATS)) stop("number of groups must match nrow(STATS)") } else { g <- qG(g, na.exclude = FALSE) # needs to be ordered to be compatible with fast functions !! if(attr(g, "N.groups") != nrow(STATS)) stop("number of groups must match nrow(STATS)") } return(.Call(Cpp_TRAm,x,STATS,g,TtI(FUN))) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = FALSE, call = FALSE) if(g[[1L]] != nrow(STATS)) stop("number of groups must match nrow(STATS)") .Call(Cpp_TRAm,x,STATS,g[[2L]],TtI(FUN)) } TRA.data.frame <- function(x, STATS, FUN = "-", g = NULL, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_TRAl,x,STATS,0L,TtI(FUN))) if(is.atomic(g)) { if(is.nmfactor(g)) { if(fnlevels(g) != fnrow2(STATS)) stop("number of groups must match nrow(STATS)") } else { g <- qG(g, na.exclude = FALSE) # needs to be ordered to be compatible with fast functions !! if(attr(g, "N.groups") != fnrow2(STATS)) stop("number of groups must match nrow(STATS)") } return(.Call(Cpp_TRAl,x,STATS,g,TtI(FUN))) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = FALSE, call = FALSE) if(g[[1L]] != fnrow2(STATS)) stop("number of groups must match nrow(STATS)") .Call(Cpp_TRAl,x,STATS,g[[2L]],TtI(FUN)) } TRA.list <- function(x, STATS, FUN = "-", g = NULL, ...) TRA.data.frame(x, STATS, FUN, g, ...) TRA.grouped_df <- function(x, STATS, FUN = "-", keep.group_vars = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) clx <- oldClass(x) oldClass(x) <- NULL oldClass(STATS) <- NULL if(g[[1L]] != length(STATS[[1L]])) stop("number of groups must match nrow(STATS)") nognst <- names(STATS) %!in% g[[5L]] mt <- ckmatch(names(STATS), names(x), "Variables in STATS not found in x:") mt <- mt[nognst] x[mt] <- .Call(Cpp_TRAl,x[mt],STATS[nognst],g[[2L]],TtI(FUN)) if(!keep.group_vars) x[names(x) %in% g[[5L]]] <- NULL oldClass(x) <- clx x } # sourceCpp('R/C++/TRAset.cpp') # sourceCpp('R/C++/TRAsetl.cpp') # sourceCpp('R/C++/TRAseta.cpp') # # setTRA <- function(X, X_ag, g = 0L, trans = "replace", ...) { # UseMethod("setTRA", X) # } # setTRA.default <- function(X, X_ag, g = 0L, trans = "replace", ...) { # # if(!(is.atomic(X_ag) && is.null(dim(X_ag)))) stop("X_ag must be a vector") # Cpp already gives error !! matrix takes first element.. # if(is.character(trans)) trans <- match(trans,c("replace.na.fill","replace","subtract","subtract.add.avg","divide","percentage","add","multiply")) # if(is.list(g)) setTRACpp(X, X_ag, g[[2]], trans) else setTRACpp(X, X_ag, g, trans) # } # setTRA.matrix <- function(X, X_ag, g = 0L, trans = "replace", ...) { # if(!is.atomic(X_ag)) stop("X_ag must be a vector or matrix") # if(is.character(trans)) trans <- match(trans,c("replace.na.fill","replace","subtract","subtract.add.avg","divide","percentage","add","multiply")) # if(is.list(g)) setTRAmCpp(X, X_ag, g[[2]], trans) else setTRAmCpp(X, X_ag, g, trans) # } # setTRA.data.frame <- function(X, X_ag, g = 0L, trans = "replace", ...) { # if(is.array(X_ag)) stop("X_ag must be a vetor or list / data.frame") # if(is.list(X_ag) && length(X_ag[[1]]) == 1) X_ag <- unlist(X_ag, use.names = FALSE) # if(is.character(trans)) trans <- match(trans,c("replace.na.fill","replace","subtract","subtract.add.avg","divide","percentage","add","multiply")) # if(is.list(g)) setTRAlCpp(X, X_ag, g[[2]], trans) else setTRAlCpp(X, X_ag, g, trans) # } # setTRA.list <- function(X, X_ag, g = 0L, trans = "replace", ...) { # if(is.array(X_ag)) stop("X_ag must be a vetor or list / data.frame") # if(is.list(X_ag) && length(X_ag[[1]]) == 1) X_ag <- unlist(X_ag, use.names = FALSE) # if(is.character(trans)) trans <- match(trans,c("replace.na.fill","replace","subtract","subtract.add.avg","divide","percentage","add","multiply")) # if(is.list(g)) setTRAlCpp(X, X_ag, g[[2]], trans) else setTRAlCpp(X, X_ag, g, trans) # } collapse/R/varying.R0000644000176200001440000003342414174223734014074 0ustar liggesusers varying <- function(x, ...) UseMethod("varying") # , x varying.default <- function(x, g = NULL, any_group = TRUE, use.g.names = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(varying.matrix(x, g, any_group, use.g.names, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_varying,x,0L,0L,any_group)) if(is.atomic(g)) { if(use.g.names && !any_group) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(Cpp_varying,x,length(lev),g,any_group), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_varying,x,fnlevels(g),g,any_group)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_varying,x,attr(g,"N.groups"),g,any_group)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names && !any_group, call = FALSE) if(use.g.names && !any_group) return(`names<-`(.Call(Cpp_varying,x,g[[1L]],g[[2L]],any_group), GRPnames(g))) .Call(Cpp_varying,x,g[[1L]],g[[2L]],any_group) } varying.pseries <- function(x, effect = 1L, any_group = TRUE, use.g.names = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- if(length(effect) == 1L) .subset2(getpix(attr(x, "index")), effect) else finteraction(.subset(getpix(attr(x, "index")), effect)) if(!any_group && use.g.names) { lev <- attr(g, "levels") return(`names<-`(.Call(Cpp_varying,x,length(lev),g,any_group), lev)) } .Call(Cpp_varying,x,fnlevels(g),g,any_group) } varying.matrix <- function(x, g = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_varyingm,x,0L,0L,any_group,drop)) if(is.atomic(g)) { if(use.g.names && !any_group) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(Cpp_varyingm,x,length(lev),g,any_group,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(Cpp_varyingm,x,fnlevels(g),g,any_group,drop)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_varyingm,x,attr(g,"N.groups"),g,any_group,drop)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names && !any_group, call = FALSE) if(use.g.names && !any_group) return(`dimnames<-`(.Call(Cpp_varyingm,x,g[[1L]],g[[2L]],any_group,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) .Call(Cpp_varyingm,x,g[[1L]],g[[2L]],any_group,drop) } varying.data.frame <- function(x, by = NULL, cols = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.call(by)) { nam <- attr(x, "names") if(length(by) == 3L) { cols <- ckmatch(all.vars(by[[2L]]), nam) gn <- ckmatch(all.vars(by[[3L]]), nam) } else { gn <- ckmatch(all.vars(by), nam) cols <- if(is.null(cols)) seq_along(unclass(x))[-gn] else cols2int(cols, x, nam, FALSE) } by <- if(length(gn) == 1L) .subset2(x, gn) else GRP.default(x, gn, return.groups = use.g.names && !any_group, call = FALSE) x <- fcolsubset(x, cols) } else if(length(cols)) x <- colsubset(x, cols) if(is.null(by)) return(.Call(Cpp_varyingl,x,0L,0L,any_group,drop)) if(is.atomic(by)) { if(use.g.names && !any_group && !inherits(x, "data.table")) { if(!is.nmfactor(by)) by <- qF(by, na.exclude = FALSE) lev <- attr(by, "levels") return(setRnDF(.Call(Cpp_varyingl,x,length(lev),by,any_group,FALSE), lev)) } if(is.nmfactor(by)) return(.Call(Cpp_varyingl,x,fnlevels(by),by,any_group,drop)) by <- qG(by, na.exclude = FALSE) return(.Call(Cpp_varyingl,x,attr(by,"N.groups"),by,any_group,drop)) } if(!is_GRP(by)) by <- GRP.default(by, return.groups = use.g.names && !any_group, call = FALSE) if(use.g.names && !any_group && !inherits(x, "data.table") && length(groups <- GRPnames(by))) return(setRnDF(.Call(Cpp_varyingl,x,by[[1L]],by[[2L]],any_group,FALSE), groups)) .Call(Cpp_varyingl,x,by[[1L]],by[[2L]],any_group,drop) } varying.list <- function(x, by = NULL, cols = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) varying.data.frame(x, by, cols, any_group, use.g.names, drop, ...) varying.pdata.frame <- function(x, effect = 1L, cols = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) index <- unclass(getpix(attr(x, "index"))) g <- if(length(effect) == 1L) index[[effect]] else finteraction(index[effect]) x <- if(is.null(cols)) fcolsubset(x, attr(x, "names") %!in% names(index[effect])) else colsubset(x, cols) if(!any_group && use.g.names) { lev <- attr(g, "levels") return(setRnDF(.Call(Cpp_varyingl,x,length(lev),g,any_group,FALSE), lev)) } .Call(Cpp_varyingl,x,fnlevels(g),g,any_group,drop) } varying.grouped_df <- function(x, any_group = TRUE, use.g.names = FALSE, drop = TRUE, keep.group_vars = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) nam <- attr(x, "names") ngn <- nam %!in% g[[5L]] if(any_group) { if(!all(ngn)) x <- if(drop) .subset(x, ngn) else fcolsubset(x, ngn) return(.Call(Cpp_varyingl,x,g[[1L]],g[[2L]],any_group,drop)) } ax <- attributes(x) ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(!all(ngn)) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam[ngn]) return(setAttributes(c(g[[4L]],.Call(Cpp_varyingl,.subset(x, ngn),g[[1L]],g[[2L]],FALSE,FALSE)), ax)) } ax[["names"]] <- nam[ngn] return(setAttributes(.Call(Cpp_varyingl,.subset(x, ngn),g[[1L]],g[[2L]],FALSE,FALSE), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]],.Call(Cpp_varyingl,x,g[[1L]],g[[2L]],FALSE,FALSE)), ax)) } else return(setAttributes(.Call(Cpp_varyingl,x,g[[1L]],g[[2L]],FALSE,FALSE), ax)) } varying.sf <- function(x, by = NULL, cols = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) { clx <- oldClass(x) oldClass(x) <- NULL x[[attr(x, "sf_column")]] <- NULL oldClass(x) <- clx[clx != "sf"] if(any(clx == "grouped_df")) return(varying.grouped_df(x, any_group, use.g.names, drop, ...)) varying.data.frame(x, by, cols, any_group, use.g.names, drop, ...) } # Previous versions: Like fast statistical functions ... # varying <- function(x, ...) UseMethod("varying") # , x # # varying.default <- function(x, g = NULL, TRA = NULL, any_group = TRUE, use.g.names = TRUE, ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) # if(is.null(TRA)) { # if(is.null(g)) return(.Call(Cpp_varying,x,0L,0L,any_group)) else if(is.atomic(g)) { # if(use.g.names && !any_group) { # if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) # lev <- attr(g, "levels") # return(`names<-`(.Call(Cpp_varying,x,length(lev),g,any_group), lev)) # } else { # if(is.nmfactor(g)) return(.Call(Cpp_varying,x,fnlevels(g),g,any_group)) else { # g <- qG(g, na.exclude = FALSE) # return(.Call(Cpp_varying,x,attr(g,"N.groups"),g,any_group)) # } # } # } else { # if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names && !any_group, call = FALSE) # if(use.g.names && !any_group) return(`names<-`(.Call(Cpp_varying,x,g[[1L]],g[[2L]],any_group), GRPnames(g))) else # return(.Call(Cpp_varying,x,g[[1L]],g[[2L]],any_group)) # } # } else { # if(is.null(g)) return(.Call(Cpp_TRA,x,.Call(Cpp_varying,x,0L,0L,any_group),0L,TtI(TRA))) else if(is.atomic(g)) { # if(is.nmfactor(g)) ng <- fnlevels(g) else { # g <- qG(g, na.exclude = FALSE) # ng <- attr(g,"N.groups") # } # return(.Call(Cpp_TRA,x,.Call(Cpp_varying,x,ng,g,any_group),if(any_group) 0L else g,TtI(TRA))) # } else { # if(!is_GRP(g)) g <- GRP.default(g, return.groups = FALSE, call = FALSE) # return(.Call(Cpp_TRA,x,.Call(Cpp_varying,x,g[[1L]],g[[2L]],any_group),if(any_group) 0L else g[[2L]],TtI(TRA))) # } # } # } # # varying.matrix <- function(x, g = NULL, TRA = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) # if(is.null(TRA)) { # if(is.null(g)) return(.Call(Cpp_varyingm,x,0L,0L,any_group,drop)) else if(is.atomic(g)) { # if(use.g.names && !any_group) { # if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) # lev <- attr(g, "levels") # return(`dimnames<-`(.Call(Cpp_varyingm,x,length(lev),g,any_group,FALSE), list(lev, dimnames(x)[[2L]]))) # } else { # if(is.nmfactor(g)) return(.Call(Cpp_varyingm,x,fnlevels(g),g,any_group,drop)) else { # g <- qG(g, na.exclude = FALSE) # return(.Call(Cpp_varyingm,x,attr(g,"N.groups"),g,any_group,drop)) # } # } # } else { # if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names && !any_group, call = FALSE) # if(use.g.names && !any_group) return(`dimnames<-`(.Call(Cpp_varyingm,x,g[[1L]],g[[2L]],any_group,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) else # return(.Call(Cpp_varyingm,x,g[[1L]],g[[2L]],any_group,drop)) # } # } else { # if(is.null(g)) return(.Call(Cpp_TRAm,x,.Call(Cpp_varyingm,x,0L,0L,any_group,TRUE),0L,TtI(TRA))) else if (is.atomic(g)) { # if(is.nmfactor(g)) ng <- fnlevels(g) else { # g <- qG(g, na.exclude = FALSE) # ng <- attr(g,"N.groups") # } # return(.Call(Cpp_TRAm,x,.Call(Cpp_varyingm,x,ng,g,any_group,drop),if(any_group) 0L else g,TtI(TRA))) # } else { # if(!is_GRP(g)) g <- GRP.default(g, return.groups = FALSE, call = FALSE) # return(.Call(Cpp_TRAm,x,.Call(Cpp_varyingm,x,g[[1L]],g[[2L]],any_group,drop),if(any_group) 0L else g[[2L]],TtI(TRA))) # } # } # } # # varying.data.frame <- function(x, g = NULL, TRA = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) # if(is.null(TRA)) { # if(is.null(g)) return(.Call(Cpp_varyingl,x,0L,0L,any_group,drop)) else if(is.atomic(g)) { # if(use.g.names && !any_group && !inherits(x, "data.table")) { # if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) # lev <- attr(g, "levels") # return(setRnDF(.Call(Cpp_varyingl,x,length(lev),g,any_group,FALSE), lev)) # } else { # if(is.nmfactor(g)) return(.Call(Cpp_varyingl,x,fnlevels(g),g,any_group,drop)) else { # g <- qG(g, na.exclude = FALSE) # return(.Call(Cpp_varyingl,x,attr(g,"N.groups"),g,any_group,drop)) # } # } # } else { # if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names && !any_group, call = FALSE) # if(use.g.names && !any_group && !inherits(x, "data.table") && length(groups <- GRPnames(g))) # return(setRnDF(.Call(Cpp_varyingl,x,g[[1L]],g[[2L]],any_group,FALSE), groups)) else # return(.Call(Cpp_varyingl,x,g[[1L]],g[[2L]],any_group,drop)) # } # } else { # if(is.null(g)) return(.Call(Cpp_TRAl,x,.Call(Cpp_varyingl,x,0L,0L,any_group,TRUE),0L,TtI(TRA))) else if(is.atomic(g)) { # if(is.nmfactor(g)) ng <- fnlevels(g) else { # g <- qG(g, na.exclude = FALSE) # ng <- attr(g,"N.groups") # } # return(.Call(Cpp_TRAl,x,.Call(Cpp_varyingl,x,ng,g,any_group,drop),if(any_group) 0L else g,TtI(TRA))) # } else { # if(!is_GRP(g)) g <- GRP.default(g, return.groups = FALSE, call = FALSE) # return(.Call(Cpp_TRAl,x,.Call(Cpp_varyingl,x,g[[1L]],g[[2L]],any_group,drop),if(any_group) 0L else g[[2L]],TtI(TRA))) # } # } # } # varying.list <- function(x, g = NULL, TRA = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) # varying.data.frame(x, g, TRA, any_group, use.g.names, drop, ...) # # # Make better version ? # varying.grouped_df <- function(x, TRA = NULL, any_group = TRUE, use.g.names = FALSE, keep.group_vars = !any_group, ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) # g <- GRP.grouped_df(x, call = FALSE) # nam <- attr(x, "names") # gn <- which(nam %in% g[[5L]]) # nTRAl <- is.null(TRA) # gl <- length(gn) > 0L # if(gl || nTRAl) { # ax <- attributes(x) # attributes(x) <- NULL # if(nTRAl) { # ax[["groups"]] <- NULL # ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) # ax[["row.names"]] <- if(use.g.names && !any_group) GRPnames(g) else if(!any_group) .set_row_names(g[[1L]]) else 1L # if(gl) { # if(keep.group_vars) { # ax[["names"]] <- c(g[[5L]], nam[-gn]) # return(setAttributes(c(g[[4L]],.Call(Cpp_varyingl,x[-gn],g[[1L]],g[[2L]],any_group,FALSE)), ax)) # } else { # ax[["names"]] <- nam[-gn] # return(setAttributes(.Call(Cpp_varyingl,x[-gn],g[[1L]],g[[2L]],any_group,FALSE), ax)) # } # } else if(keep.group_vars) { # ax[["names"]] <- c(g[[5L]], nam) # return(setAttributes(c(g[[4L]],.Call(Cpp_varyingl,x,g[[1L]],g[[2L]],any_group,FALSE)), ax)) # } else return(setAttributes(.Call(Cpp_varyingl,x,g[[1L]],g[[2L]],any_group,FALSE), ax)) # } else if(keep.group_vars) { # ax[["names"]] <- c(nam[gn], nam[-gn]) # return(setAttributes(c(x[gn],.Call(Cpp_TRAl,x[-gn],.Call(Cpp_varyingl,x[-gn],g[[1L]],g[[2L]],any_group,FALSE),g[[2L]],TtI(TRA))), ax)) # } else { # ax[["names"]] <- nam[-gn] # return(setAttributes(.Call(Cpp_TRAl,x[-gn],.Call(Cpp_varyingl,x[-gn],g[[1L]],g[[2L]],any_group,FALSE),g[[2L]],TtI(TRA)), ax)) # } # } else return(.Call(Cpp_TRAl,x,.Call(Cpp_varyingl,x,g[[1L]],g[[2L]],any_group,FALSE),g[[2L]],TtI(TRA))) # } # collapse/R/fmode.R0000644000176200001440000001557614172367040013514 0ustar liggesusers # Note: for principal innovations of this code see fsum.R fmode <- function(x, ...) UseMethod("fmode") # , x fmode.default <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, ties = "first", ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fmode.matrix(x, g, w, TRA, na.rm, use.g.names, ties = ties, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) ret <- switch(ties, first = 0L, min = 1L, max = 2L, last = 3L, stop("Unknown ties option: ", ties)) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fmode,x,0L,0L,NULL,w,na.rm,ret)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(Cpp_fmode,x,length(lev),g,NULL,w,na.rm,ret), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fmode,x,fnlevels(g),g,NULL,w,na.rm,ret)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fmode,x,attr(g,"N.groups"),g,NULL,w,na.rm,ret)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(Cpp_fmode,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,ret), GRPnames(g))) return(.Call(Cpp_fmode,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,ret)) } if(is.null(g)) return(.Call(Cpp_TRA,x,.Call(Cpp_fmode,x,0L,0L,NULL,w,na.rm,ret),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRA,x,.Call(Cpp_fmode,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,ret),g[[2L]],TtI(TRA)) } fmode.matrix <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ties = "first", ...) { if(!missing(...)) unused_arg_action(match.call(), ...) ret <- switch(ties, first = 0L, min = 1L, max = 2L, last = 3L, stop("Unknown ties option: ", ties)) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fmodem,x,0L,0L,NULL,w,na.rm,drop,ret)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(Cpp_fmodem,x,length(lev),g,NULL,w,na.rm,FALSE,ret), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(Cpp_fmodem,x,fnlevels(g),g,NULL,w,na.rm,FALSE,ret)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fmodem,x,attr(g,"N.groups"),g,NULL,w,na.rm,FALSE,ret)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(Cpp_fmodem,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,ret), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(Cpp_fmodem,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,ret)) } if(is.null(g)) return(.Call(Cpp_TRAm,x,.Call(Cpp_fmodem,x,0L,0L,NULL,w,na.rm,TRUE,ret),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAm,x,.Call(Cpp_fmodem,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,ret),g[[2L]],TtI(TRA)) } fmode.data.frame <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ties = "first", ...) { if(!missing(...)) unused_arg_action(match.call(), ...) ret <- switch(ties, first = 0L, min = 1L, max = 2L, last = 3L, stop("Unknown ties option: ", ties)) if(is.null(TRA)) { if(is.null(g)) if(drop) return(unlist(.Call(Cpp_fmodel,x,0L,0L,NULL,w,na.rm,ret))) else return(.Call(Cpp_fmodel,x,0L,0L,NULL,w,na.rm,ret)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(Cpp_fmodel,x,length(lev),g,NULL,w,na.rm,ret), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fmodel,x,fnlevels(g),g,NULL,w,na.rm,ret)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fmodel,x,attr(g,"N.groups"),g,NULL,w,na.rm,ret)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(Cpp_fmodel,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,ret), groups)) return(.Call(Cpp_fmodel,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,ret)) } if(is.null(g)) return(.Call(Cpp_TRAl,x,.Call(Cpp_fmodel,x,0L,0L,NULL,w,na.rm,ret),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAl,x,.Call(Cpp_fmodel,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,ret),g[[2L]],TtI(TRA)) } fmode.list <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ties = "first", ...) fmode.data.frame(x, g, w, TRA, na.rm, use.g.names, drop, ties, ...) fmode.grouped_df <- function(x, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, ties = "first", ...) { if(!missing(...)) unused_arg_action(match.call(), ...) ret <- switch(ties, first = 0L, min = 1L, max = 2L, last = 3L, stop("Unknown ties option: ", ties)) g <- GRP.grouped_df(x, call = FALSE) wsym <- l1orn(as.character(substitute(w)), NULL) nam <- attr(x, "names") gn2 <- gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) sumw <- NULL if(length(wsym) && length(wn <- whichv(nam, wsym))) { w <- .subset2(x, wn) if(any(gn == wn)) stop("Weights coincide with grouping variables!") gn <- c(gn, wn) if(keep.w) { if(nTRAl) sumw <- `names<-`(list(fsumC(w,g[[1L]],g[[2L]],NULL,na.rm)), paste0("sum.", wsym)) else if(keep.group_vars) gn2 <- gn else sumw <- gn2 <- wn } } gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], names(sumw), nam[-gn]) return(setAttributes(c(g[[4L]], sumw, .Call(Cpp_fmodel,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,ret)), ax)) } ax[["names"]] <- c(names(sumw), nam[-gn]) return(setAttributes(c(sumw, .Call(Cpp_fmodel,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,ret)), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]], .Call(Cpp_fmodel,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,ret)), ax)) } else return(setAttributes(.Call(Cpp_fmodel,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,ret), ax)) } else if(keep.group_vars || (keep.w && length(sumw))) { ax[["names"]] <- c(nam[gn2], nam[-gn]) return(setAttributes(c(x[gn2],.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fmodel,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,ret),g[[2L]],TtI(TRA))), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fmodel,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,ret),g[[2L]],TtI(TRA)), ax)) } else return(.Call(Cpp_TRAl,x,.Call(Cpp_fmodel,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,ret),g[[2L]],TtI(TRA))) } collapse/R/BY.R0000644000176200001440000002656314201327077012731 0ustar liggesusers BY <- function(x, ...) UseMethod("BY") BY.default <- function(x, g, FUN, ..., use.g.names = TRUE, sort = TRUE, expand.wide = FALSE, parallel = FALSE, mc.cores = 1L, return = c("same", "vector", "list")) { if(!is.atomic(x)) stop("x needs to be an atomic vector") # redundant ? if(is.matrix(x) && !inherits(x, "matrix")) return(UseMethod("BY", unclass(x))) if(!(is.function(FUN) || is.character(FUN))) stop("FUN needs to be a function") aplyfun <- if(parallel) function(...) mclapply(..., mc.cores = mc.cores) else lapply simplify <- switch(return[1L], same = 1L, vector = 2L, list = 3L, stop("BY.default only supports same, vector and list output!")) g <- GRP(g, return.groups = use.g.names, sort = sort, call = FALSE) res <- aplyfun(gsplit(x, g), FUN, ...) if(use.g.names) names(res) <- GRPnames(g, FALSE) if(simplify == 3L) return(res) if(expand.wide) return(do.call(rbind, res)) if(use.g.names) { res <- unlist(res, recursive = FALSE) if(simplify == 1L) return(copyMostAttributes(res, x)) # here needs to be copyMostAttributes... otherwise overwrites names } else { if(simplify == 1L) { res <- unlist(res, FALSE, FALSE) if(length(res) == length(x) && typeof(res) == typeof(x) && isTRUE(g$ordered[2L])) return(duplAttributes(res, x)) return(copyMostAttributes(res, x)) } # If we return a vector and do not use group names but a function like quantile(), we may still replicate the names given by that function... ll <- length(res) nr1 <- names(res[[1L]]) res <- unlist(res, FALSE, FALSE) if(length(res) != ll && length(nr1) && length(res) == length(nr1)*ll) names(res) <- rep(nr1, ll) } res } # Experimental: But not really faster and also risky because vapply checks types and types may differ... # copysplaplfun <- function(x, g, FUN, ...) { # sx <- gsplit(x, g) # if(length(sx) > 100000L && length(r1 <- match.fun(FUN)(sx[[1L]], ...)) == 1L) # return(copyMostAttributes(vapply(sx, FUN, r1, ..., USE.NAMES = FALSE), x)) # copyMostAttributes(unlist(lapply(sx, FUN, ...), FALSE, FALSE), x) # } copysplaplfun <- function(x, g, FUN, ...) copyMostAttributes(unlist(lapply(gsplit(x, g), FUN, ...), FALSE, FALSE), x) splaplfun <- function(x, g, FUN, ...) unlist(lapply(gsplit(x, g), FUN, ...), FALSE, FALSE) BY.data.frame <- function(x, g, FUN, ..., use.g.names = TRUE, sort = TRUE, expand.wide = FALSE, parallel = FALSE, mc.cores = 1L, return = c("same", "matrix", "data.frame", "list")) { if(!is.list(x)) stop("x needs to be a list") if(!(is.function(FUN) || is.character(FUN))) stop("FUN needs to be a function") aplyfun <- if(parallel) function(...) mclapply(..., mc.cores = mc.cores) else lapply return <- switch(return[1L], same = 1L, matrix = 3L, data.frame = 2L, list = 0L, stop("Unknown return option!")) g <- GRP(g, return.groups = use.g.names, sort = sort, call = FALSE) if(return != 0L) { ax <- attributes(x) if(expand.wide) { if(return < 3L) { # Return a data.frame splitfun <- function(y) .Call(Cpp_mctl, do.call(rbind, lapply(gsplit(y, g), FUN, ...)), TRUE, 0L) res <- unlist(aplyfun(x, splitfun), recursive = FALSE, use.names = TRUE) if(return == 1L) { isDTl <- inherits(x, "data.table") ax[["names"]] <- names(res) ax[["row.names"]] <- if(use.g.names && !inherits(x, "data.table") && length(gn <- GRPnames(g))) gn else .set_row_names(length(res[[1L]])) } else { isDTl <- FALSE ax <- list(names = names(res), row.names = if(use.g.names && length(gn <- GRPnames(g))) gn else .set_row_names(length(res[[1L]])), class = "data.frame") } } else { # Return a matrix attributes(x) <- NULL splitfun <- function(y) do.call(rbind, lapply(gsplit(y, g), FUN, ...)) res <- do.call(cbind, aplyfun(x, splitfun)) cn <- dimnames(res)[[2L]] namr <- rep(ax[["names"]], each = ncol(res)/length(x)) dimnames(res) <- list(if(use.g.names) GRPnames(g) else NULL, if(length(cn)) paste(namr, cn, sep = ".") else namr) return(res) } } else { # No expand wide (classical result) matl <- return == 3L isDTl <- !matl && return != 2L && inherits(x, "data.table") attributes(x) <- NULL if(return == 2L) ax <- list(names = ax[["names"]], row.names = ax[["row.names"]], class = "data.frame") if(use.g.names && !isDTl && length(gn <- GRPnames(g))) { # Using names... res <- vector("list", length(x)) res1 <- lapply(gsplit(x[[1L]], g), FUN, ...) names(res1) <- gn res[[1L]] <- unlist(res1, FALSE, TRUE) namres1 <- names(res[[1L]]) if(matl) dn <- list(namres1, ax[["names"]]) else if(length(namres1)) ax[["row.names"]] <- namres1 else if(length(res[[1L]]) != length(x[[1L]])) ax[["row.names"]] <- .set_row_names(length(res[[1L]])) if(length(namres1)) names(res[[1L]]) <- NULL if(matl) { if(length(res) > 1L) res[-1L] <- aplyfun(x[-1L], splaplfun, g, FUN, ...) res <- do.call(cbind, res) dimnames(res) <- dn return(res) } else { copyMostAttributes(res[[1L]], x[[1L]]) if(length(res) > 1L) res[-1L] <- aplyfun(x[-1L], copysplaplfun, g, FUN, ...) } } else { # Not using names... if(matl) { res <- do.call(cbind, aplyfun(x, splaplfun, g, FUN, ...)) sl <- isTRUE(g$ordered[2L]) && nrow(res) == length(x[[1L]]) if(sl) rn1 <- ax[["row.names"]][1L] dimnames(res) <- list(if(sl && length(rn1) && is.character(rn1) && rn1 != "1") ax[["row.names"]] else NULL, ax[["names"]]) return(res) } else { res <- aplyfun(x, copysplaplfun, g, FUN, ...) if(length(res[[1L]]) != length(x[[1L]]) || !isTRUE(g$ordered[2L])) ax[["row.names"]] <- .set_row_names(length(res[[1L]])) } } } return(condalcSA(res, ax, isDTl)) } if(expand.wide) return(aplyfun(x, function(y) do.call(rbind, lapply(gsplit(y, g, use.g.names), FUN, ...)))) return(aplyfun(x, function(y) lapply(gsplit(y, g, use.g.names), FUN, ...))) } BY.list <- function(x, ...) BY.data.frame(x, ...) BY.matrix <- function(x, g, FUN, ..., use.g.names = TRUE, sort = TRUE, expand.wide = FALSE, parallel = FALSE, mc.cores = 1L, return = c("same", "matrix", "data.frame", "list")) { if(!is.matrix(x)) stop("x needs to be a matrix") if(!(is.function(FUN) || is.character(FUN))) stop("FUN needs to be a function") aplyfun <- if(parallel) function(...) parallel::mclapply(..., mc.cores = mc.cores) else lapply return <- switch(return[1L], same = 3L, matrix = 2L, data.frame = 1L, list = 0L, stop("Unknown return option!")) g <- GRP(g, return.groups = use.g.names, sort = sort, call = FALSE) if(return != 0L) { if(expand.wide) { if(return == 1L) { # Return data frame splitfun <- function(y) .Call(Cpp_mctl, do.call(rbind, lapply(gsplit(y, g), FUN, ...)), TRUE, 0L) res <- unlist(aplyfun(.Call(Cpp_mctl, x, TRUE, 0L), splitfun), recursive = FALSE, use.names = TRUE) ax <- list(names = names(res), row.names = if(use.g.names && length(gn <- GRPnames(g))) gn else .set_row_names(length(res[[1L]])), class = "data.frame") } else { # Return a matrix splitfun2 <- function(y) do.call(rbind, lapply(gsplit(y, g), FUN, ...)) res <- do.call(cbind, aplyfun(.Call(Cpp_mctl, x, FALSE, 0L), splitfun2)) cn <- dimnames(res)[[2L]] namr <- rep(dimnames(x)[[2L]], each = ncol(res)/ncol(x)) dn <- list(if(use.g.names) GRPnames(g) else NULL, if(length(cn)) paste(namr, cn, sep = ".") else namr) if(return == 2L) return(`dimnames<-`(res, dn)) ax <- attributes(x) ax[["dim"]] <- dim(res) ax[["dimnames"]] <- dn } } else { if(use.g.names && length(gn <- GRPnames(g))) { res <- vector("list", ncol(x)) res1 <- lapply(gsplit(`names<-`(x[, 1L], NULL), g), FUN, ...) names(res1) <- gn res[[1L]] <- unlist(res1, FALSE, TRUE) namres1 <- names(res[[1L]]) if(length(namres1)) names(res[[1L]]) <- NULL if(length(res) > 1L) res[-1L] <- aplyfun(.Call(Cpp_mctl, x[, -1L, drop = FALSE], FALSE, 0L), splaplfun, g, FUN, ...) if(return > 1L) { # Return a matrix res <- do.call(cbind, res) dn <- list(namres1, dimnames(x)[[2L]]) if(return == 2L) return(`dimnames<-`(res, dn)) ax <- attributes(x) ax[["dim"]] <- dim(res) ax[["dimnames"]] <- dn } else { # Return a data frame ax <- list(names = dimnames(x)[[2L]], row.names = if(length(namres1)) namres1 else .set_row_names(length(res[[1L]])), class = "data.frame") } } else { res <- aplyfun(.Call(Cpp_mctl, x, TRUE, 0L), splaplfun, g, FUN, ...) if(return > 1L) { # Return a matrix res <- do.call(cbind, res) if(return == 2L) return(res) ax <- attributes(x) if(length(dimnames(x)[[1L]]) && !(isTRUE(g$ordered[2L]) && nrow(res) == nrow(x))) { ax[["dimnames"]][1L] <- list(NULL) ax[["dim"]] <- dim(res) } } else { # Return a data frame lr1 <- length(res[[1L]]) ax <- list(names = names(res), row.names = if(lr1 == nrow(x) && length(rn <- dimnames(x)[[1L]]) && isTRUE(g$ordered[2L])) rn else .set_row_names(lr1), class = "data.frame") } } } return(setAttributes(res, ax)) } if(expand.wide) return(aplyfun(.Call(Cpp_mctl, x, TRUE, 0L), function(y) do.call(rbind, lapply(gsplit(y, g, use.g.names), FUN, ...)))) return(aplyfun(.Call(Cpp_mctl, x, TRUE, 0L), function(y) lapply(gsplit(y, g, use.g.names), FUN, ...))) } BY.grouped_df <- function(x, FUN, ..., keep.group_vars = TRUE, use.g.names = FALSE) { g <- GRP.grouped_df(x, call = FALSE) gn <- which(attr(x, "names") %in% g[[5L]]) # if(!length(gn)) { # if(!isTRUE(g$ordered[2L])) return(BY.data.frame(fungroup(x), g, FUN, ..., use.g.names = use.g.names)) # res <- BY.data.frame(x, g, FUN, ..., use.g.names = use.g.names) # if(!is.data.frame(res) || fnrow2(res) == fnrow2(x)) return(res) else return(fungroup(res)) # } res <- BY.data.frame(if(length(gn)) fcolsubset(x, -gn) else x, g, FUN, ..., use.g.names = use.g.names) if(!is.data.frame(res)) return(res) nrr <- fnrow2(res) same_size <- nrr == fnrow2(x) if(!keep.group_vars) return(if(same_size && isTRUE(g$ordered[2L])) res else fungroup(res)) if(!((same_size && isTRUE(g$ordered[2L])) || nrr == g[[1L]])) return(fungroup(res)) if(same_size) { ar <- attributes(res) ar[["names"]] <- c(g[[5L]], ar[["names"]]) return(condalcSA(c(.subset(x, gn), res), ar, any(ar$class == "data.table"))) } ar <- attributes(fungroup2(res, oldClass(res))) attributes(res) <- NULL ar[["names"]] <- c(g[[5L]], ar[["names"]]) condalcSA(c(g[[4L]], res), ar, any(ar$class == "data.table")) } collapse/R/fsum.R0000644000176200001440000001373314172367040013365 0ustar liggesusers fsum <- function(x, ...) UseMethod("fsum") # , x fsum.default <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fsum.matrix(x, g, w, TRA, na.rm, use.g.names, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(C_fsum,x,0L,0L,w,na.rm)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(C_fsum,x,length(lev),g,w,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(C_fsum,x,fnlevels(g),g,w,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fsum,x,attr(g,"N.groups"),g,w,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(C_fsum,x,g[[1L]],g[[2L]],w,na.rm), GRPnames(g))) return(.Call(C_fsum,x,g[[1L]],g[[2L]],w,na.rm)) } if(is.null(g)) return(.Call(Cpp_TRA,x,.Call(C_fsum,x,0L,0L,w,na.rm),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRA,x,.Call(C_fsum,x,g[[1L]],g[[2L]],w,na.rm),g[[2L]],TtI(TRA)) } fsum.matrix <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(C_fsumm,x,0L,0L,w,na.rm,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(C_fsumm,x,length(lev),g,w,na.rm,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(C_fsumm,x,fnlevels(g),g,w,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fsumm,x,attr(g,"N.groups"),g,w,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(C_fsumm,x,g[[1L]],g[[2L]],w,na.rm,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(C_fsumm,x,g[[1L]],g[[2L]],w,na.rm,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAm,x,.Call(C_fsumm,x,0L,0L,w,na.rm,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAm,x,.Call(C_fsumm,x,g[[1L]],g[[2L]],w,na.rm,FALSE),g[[2L]],TtI(TRA)) } fsum.data.frame <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(C_fsuml,x,0L,0L,w,na.rm,drop)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(C_fsuml,x,length(lev),g,w,na.rm,FALSE), lev)) } if(is.nmfactor(g)) return(.Call(C_fsuml,x,fnlevels(g),g,w,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fsuml,x,attr(g,"N.groups"),g,w,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(C_fsuml,x,g[[1L]],g[[2L]],w,na.rm,FALSE), groups)) return(.Call(C_fsuml,x,g[[1L]],g[[2L]],w,na.rm,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAl,x,.Call(C_fsuml,x,0L,0L,w,na.rm,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAl,x,.Call(C_fsuml,x,g[[1L]],g[[2L]],w,na.rm,FALSE),g[[2L]],TtI(TRA)) } fsum.list <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) fsum.data.frame(x, g, w, TRA, na.rm, use.g.names, drop, ...) fsum.grouped_df <- function(x, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- l1orn(as.character(substitute(w)), NULL) nam <- attr(x, "names") gn2 <- gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) sumw <- NULL if(length(wsym) && length(wn <- whichv(nam, wsym))) { w <- .subset2(x, wn) # faster using unclass? if(any(gn == wn)) stop("Weights coincide with grouping variables!") gn <- c(gn, wn) if(keep.w) { if(nTRAl) sumw <- `names<-`(list(fsumC(w,g[[1L]],g[[2L]],NULL,na.rm)), paste0("sum.", wsym)) else if(keep.group_vars) gn2 <- gn else sumw <- gn2 <- wn } } gl <- length(gn) > 0L # necessary here, not before ! if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], names(sumw), nam[-gn]) return(setAttributes(c(g[[4L]], sumw, .Call(C_fsuml,x[-gn],g[[1L]],g[[2L]],w,na.rm,FALSE)), ax)) } ax[["names"]] <- c(names(sumw), nam[-gn]) return(setAttributes(c(sumw, .Call(C_fsuml,x[-gn],g[[1L]],g[[2L]],w,na.rm,FALSE)), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]], .Call(C_fsuml,x,g[[1L]],g[[2L]],w,na.rm,FALSE)), ax)) } else return(setAttributes(.Call(C_fsuml,x,g[[1L]],g[[2L]],w,na.rm,FALSE), ax)) } else if(keep.group_vars || (keep.w && length(sumw))) { ax[["names"]] <- c(nam[gn2], nam[-gn]) return(setAttributes(c(x[gn2],.Call(Cpp_TRAl,x[-gn],.Call(C_fsuml,x[-gn],g[[1L]],g[[2L]],w,na.rm,FALSE),g[[2L]],TtI(TRA))), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(Cpp_TRAl,x[-gn],.Call(C_fsuml,x[-gn],g[[1L]],g[[2L]],w,na.rm,FALSE),g[[2L]],TtI(TRA)), ax)) } else return(.Call(Cpp_TRAl,x,.Call(C_fsuml,x,g[[1L]],g[[2L]],w,na.rm,FALSE),g[[2L]],TtI(TRA))) } collapse/R/psacf.R0000644000176200001440000003702514174223734013512 0ustar liggesusers # TODO: could use source code of C_acf and adjust for panel: https://github.com/SurajGupta/r-source/blob/a28e609e72ed7c47f6ddfbb86c85279a0750f0b7/src/library/stats/src/filter.c psacf <- function(x, ...) UseMethod("psacf") # , x psacf.default <- function(x, g, t = NULL, lag.max = NULL, type = c("correlation", "covariance","partial"), plot = TRUE, gscale = TRUE, ...) { if(!is.numeric(x)) stop("'x' must be a numeric vector") typei <- switch(type[1L], correlation = 1L, covariance = 2L, partial = 3L, stop("Unknown type!")) series <- l1orlst(as.character(substitute(x))) g <- G_guo(g) if(is.null(lag.max)) lag.max <- round(2*sqrt(length(x)/g[[1L]])) if(gscale) x <- fscaleCpp(x,g[[1L]],g[[2L]]) acf <- if(typei == 2L) cov(x, .Call(Cpp_flaglead,x,0:lag.max,NA,g[[1L]],g[[2L]],G_t(t),FALSE), use = "pairwise.complete.obs") else c(1, cov(x, .Call(Cpp_flaglead,x,seq_len(lag.max),NA,g[[1L]],g[[2L]],G_t(t),FALSE), use = "pairwise.complete.obs")/fvar.default(x)) # or complete obs ? d <- c(lag.max+1,1,1) if(typei == 3L) { acf <- .Call(C_pacf1, array(acf, d), lag.max) lag <- array(seq_len(d[1]), c(lag.max,1,1)) } else { dim(acf) <- d lag <- array(0:lag.max, d) } acf.out <- `oldClass<-`(list(acf = acf, type = type[1L], n.used = length(x), lag = lag, series = series, snames = NULL), "acf") if(plot) { plot(acf.out, ylab = if(typei == 3L) "Panel Series Partial ACF" else "Panel Series ACF", ...) invisible(acf.out) } else { if(!missing(...)) unused_arg_action(match.call(), ...) return(acf.out) } } psacf.data.frame <- function(x, by, t = NULL, cols = is.numeric, lag.max = NULL, type = c("correlation", "covariance","partial"), plot = TRUE, gscale = TRUE, ...) { typei <- switch(type[1L], correlation = 1L, covariance = 2L, partial = 3L, stop("Unknown type!")) series <- l1orlst(as.character(substitute(x))) oldClass(x) <- NULL if(is.call(by)) { # best way ? nam <- names(x) if(length(by) == 3L) { v <- ckmatch(all.vars(by[[2L]]), nam) by <- ckmatch(all.vars(by[[3L]]), nam) } else { by <- ckmatch(all.vars(by), nam) v <- if(is.null(cols)) seq_along(x)[-by] else fsetdiff(cols2int(cols, x, nam), by) } by <- if(length(by) == 1L) x[[by]] else x[by] if(is.call(t)) { # If time-variable supplied t <- ckmatch(all.vars(t), nam, "Unknown time variable:") v <- fsetdiff(v, t) t <- if(length(t) == 1L) x[[t]] else x[t] } x <- x[v] } else if(length(cols)) x <- x[cols2int(cols, x, names(x), FALSE)] lx <- length(x) nrx <- length(x[[1L]]) snames <- names(x) attributes(x) <- NULL # already class is 0... Necessary ? getacf <- function(ng, g) { if(length(t)) t <- G_t(t) if(gscale) x <- fscalelCpp(x,ng,g) acf <- array(numeric(0), c(lag.max+1, lx, lx)) fun <- if(typei == 2L) cov else function(x, y, ...) cov(x, y, ...)/fvar.default(x) # cor for(i in seq_len(lx)) { xim <- .Call(Cpp_flaglead,x[[i]],0:lag.max,NA,ng,g,t,FALSE) for(j in seq_len(lx)) acf[ , j, i] <- fun(x[[j]], xim, use = "pairwise.complete.obs") # correct ! } acf } by <- G_guo(by) if(is.null(lag.max)) lag.max <- round(2*sqrt(nrx/by[[1L]])) acf <- getacf(by[[1L]], by[[2L]]) lag <- matrix(1, lx, lx) lag[lower.tri(lag)] <- -1 if(typei == 3L) { zvec <- double((1L+lag.max)*lx*lx) z <- .C(C_multi_yw, aperm(acf, 3:1), as.integer(nrx), as.integer(lag.max), as.integer(lx), coefs = zvec, pacf = zvec, var = zvec, aic = double(1L+lag.max), order = 0L, 1L) acf <- aperm(array(z$pacf, dim = c(lx, lx, lag.max + 1L)), 3:1)[-1L, , , drop = FALSE] } acf.out <- `oldClass<-`(list(acf = acf, type = type[1L], n.used = nrx, lag = if(typei == 3L) outer(1L:lag.max, lag) else outer(0L:lag.max, lag), series = series, snames = snames), "acf") if(plot) { plot(acf.out, ylab = if(typei == 3L) "Panel Series Partial ACF" else "Panel Series ACF", mar = if(lx > 2) c(3, 2.4, 2, 0.8) else par("mar"), ...) invisible(acf.out) } else { if(!missing(...)) unused_arg_action(match.call(), ...) return(acf.out) } } psacf.pseries <- function(x, lag.max = NULL, type = c("correlation", "covariance","partial"), plot = TRUE, gscale = TRUE, ...) { if(!is.numeric(x)) stop("'x' must be a numeric pseries ") index <- unclass(getpix(attr(x, "index"))) if(length(index) > 2L) index <- list(finteraction(index[-length(index)]), index[[length(index)]]) g <- index[[1L]] t <- index[[2L]] tlev <- attr(t, "levels") oldopts <- options(warn = -1L) on.exit(options(oldopts)) if(is.finite(as.integer(tlev[1L]))) t <- as.integer(tlev)[t] ng <- fnlevels(g) typei <- switch(type[1L], correlation = 1L, covariance = 2L, partial = 3L, stop("Unknown type!")) series <- l1orlst(as.character(substitute(x))) # faster ? if(is.null(lag.max)) lag.max <- round(2*sqrt(length(x)/ng)) if(gscale) x <- fscaleCpp(x,ng,g) acf <- if(typei == 2L) cov(x, .Call(Cpp_flaglead,x,0:lag.max,NA,ng,g,t,FALSE), use = "pairwise.complete.obs") else c(1, cov(x, .Call(Cpp_flaglead,x,seq_len(lag.max),NA,ng,g,t,FALSE), use = "pairwise.complete.obs")/fvar.default(x)) # or complete obs ? d <- c(lag.max+1,1,1) if(typei == 3L) { acf <- .Call(C_pacf1, array(acf, d), lag.max) lag <- array(seq_len(d[1]), c(lag.max,1,1)) } else { dim(acf) <- d lag <- array(0:lag.max, d) } acf.out <- `oldClass<-`(list(acf = acf, type = type[1L], n.used = length(x), lag = lag, series = series, snames = NULL), "acf") if (plot) { plot(acf.out, ylab = if(typei == 3L) "Panel Series Partial ACF" else "Panel Series ACF", ...) invisible(acf.out) } else { if(!missing(...)) unused_arg_action(match.call(), ...) return(acf.out) } } psacf.pdata.frame <- function(x, cols = is.numeric, lag.max = NULL, type = c("correlation", "covariance","partial"), plot = TRUE, gscale = TRUE, ...) { typei <- switch(type[1L], correlation = 1L, covariance = 2L, partial = 3L, stop("Unknown type!")) series <- l1orlst(as.character(substitute(x))) # faster solution ? index <- unclass(getpix(attr(x, "index"))) oldClass(x) <- NULL nrx <- length(x[[1L]]) if(length(cols)) x <- x[cols2int(cols, x, names(x), FALSE)] lx <- length(x) snames <- names(x) if(length(index) > 2L) index <- list(finteraction(index[-length(index)]), index[[length(index)]]) g <- index[[1L]] t <- index[[2L]] tlev <- attr(t, "levels") oldopts <- options(warn = -1L) on.exit(options(oldopts)) if(is.finite(as.integer(tlev[1L]))) t <- as.integer(tlev)[t] ng <- fnlevels(g) attributes(x) <- NULL # necessary after unclass above ? if(is.null(lag.max)) lag.max <- round(2*sqrt(nrx/ng)) if(gscale) x <- fscalelCpp(x,ng,g) acf <- array(numeric(0), c(lag.max+1, lx, lx)) fun <- if(typei == 2L) cov else function(x, y, ...) cov(x, y, ...)/fvar.default(x) # cor for(i in seq_len(lx)) { xim <- .Call(Cpp_flaglead,x[[i]],0:lag.max,NA,ng,g,t,FALSE) for(j in seq_len(lx)) acf[ , j, i] <- fun(x[[j]], xim, use = "pairwise.complete.obs") # correct ! } lag <- matrix(1, lx, lx) lag[lower.tri(lag)] <- -1 if(typei == 3L) { zvec <- double((1L+lag.max)*lx*lx) z <- .C(C_multi_yw, aperm(acf, 3:1), as.integer(nrx), as.integer(lag.max), as.integer(lx), coefs = zvec, pacf = zvec, var = zvec, aic = double(1L+lag.max), order = 0L, 1L) acf <- aperm(array(z$pacf, dim = c(lx, lx, lag.max + 1L)), 3:1)[-1L, , , drop = FALSE] } acf.out <- `oldClass<-`(list(acf = acf, type = type[1L], n.used = nrx, lag = if(typei == 3L) outer(1L:lag.max, lag) else outer(0L:lag.max, lag), series = series, snames = snames), "acf") if(plot) { plot(acf.out, ylab = if(typei == 3L) "Panel Series Partial ACF" else "Panel Series ACF", mar = if(lx > 2) c(3, 2.4, 2, 0.8) else par("mar"), ...) invisible(acf.out) } else { if(!missing(...)) unused_arg_action(match.call(), ...) return(acf.out) } } pspacf <- function(x, ...) UseMethod("pspacf") # , x pspacf.default <- function(x, g, t = NULL, lag.max = NULL, plot = TRUE, gscale = TRUE, ...) { if(plot) psacf.default(x, g, t, lag.max, "partial", plot, gscale, main = paste0("Series ",l1orlst(as.character(substitute(x)))), ...) else psacf.default(x, g, t, lag.max, "partial", plot, gscale, ...) } pspacf.pseries <- function(x, lag.max = NULL, plot = TRUE, gscale = TRUE, ...) { if(plot) psacf.pseries(x, lag.max, "partial", plot, gscale, main = paste0("Series ",l1orlst(as.character(substitute(x)))), ...) else psacf.pseries(x, lag.max, "partial", plot, gscale, ...) } pspacf.data.frame <- function(x, by, t = NULL, cols = is.numeric, lag.max = NULL, plot = TRUE, gscale = TRUE, ...) { psacf.data.frame(x, by, t, cols, lag.max, "partial", plot, gscale, ...) } pspacf.pdata.frame <- function(x, cols = is.numeric, lag.max = NULL, plot = TRUE, gscale = TRUE, ...) { psacf.pdata.frame(x, cols, lag.max, "partial", plot, gscale, ...) } psccf <- function(x, y, ...) UseMethod("psccf") # , x psccf.default <- function(x, y, g, t = NULL, lag.max = NULL, type = c("correlation", "covariance"), plot = TRUE, gscale = TRUE, ...) { if(!is.numeric(x)) stop("'x' must be a numeric vector") if(!is.numeric(y)) stop("'y' must be a numeric vector") lx <- length(x) if(lx != length(y)) stop("length(x) must be equal to length(y)") typei <- switch(type[1L], correlation = 1L, covariance = 2L, partial = 3L, stop("Unknown type!")) snames <- paste(c(l1orlst(as.character(substitute(x))), l1orlst(as.character(substitute(x)))), collapse = " & ") getccf <- function(ng, g) { if(length(t)) t <- G_t(t) if(gscale) { x <- fscaleCpp(x,ng,g) y <- fscaleCpp(y,ng,g) } if(typei == 2L) drop(cov(x, .Call(Cpp_flaglead,y,-lag.max:lag.max,NA,ng,g,t,FALSE), use = "pairwise.complete.obs")) else drop(cov(x, .Call(Cpp_flaglead,y,-lag.max:lag.max,NA,ng,g,t,FALSE), use = "pairwise.complete.obs")/(fsd.default(x)*fsd.default(y))) # or complete obs ? } g <- G_guo(g) if(is.null(lag.max)) lag.max <- round(2*sqrt(lx/g[[1L]])) acf <- getccf(g[[1L]], g[[2L]]) d <- c(2*lag.max+1,1,1) dim(acf) <- d acf.out <- `oldClass<-`(list(acf = acf, type = type[1L], n.used = lx, lag = array(-lag.max:lag.max, d), series = snames, snames = snames), "acf") if (plot) { plot(acf.out, ylab = "Panel Series CCF", ...) invisible(acf.out) } else { if(!missing(...)) unused_arg_action(match.call(), ...) return(acf.out) } } psccf.pseries <- function(x, y, lag.max = NULL, type = c("correlation", "covariance"), plot = TRUE, gscale = TRUE, ...) { if(!is.numeric(x)) stop("'x' must be a numeric pseries") if(!is.numeric(y) || !inherits(y, "pseries")) stop("'y' must be a numeric pseries") lx <- length(x) if(lx != length(y)) stop("length(x) must be equal to length(y)") index <- getpix(attr(x, "index")) if(!identical(index, getpix(attr(y, "index")))) stop("index of x and y differs") oldClass(index) <- NULL if(length(index) > 2L) index <- list(finteraction(index[-length(index)]), index[[length(index)]]) g <- index[[1L]] t <- index[[2L]] tlev <- attr(t, "levels") oldopts <- options(warn = -1L) on.exit(options(oldopts)) if(is.finite(as.integer(tlev[1L]))) t <- as.integer(tlev)[t] ng <- fnlevels(g) typei <- switch(type[1L], correlation = 1L, covariance = 2L, partial = 3L, stop("Unknown type!")) snames <- paste(c(l1orlst(as.character(substitute(x))), l1orlst(as.character(substitute(x)))), collapse = " & ") if (gscale) { x <- fscaleCpp(x,ng,g) y <- fscaleCpp(y,ng,g) } if (is.null(lag.max)) lag.max <- round(2*sqrt(length(x)/ng)) l_seq <- -lag.max:lag.max acf <- if(typei == 2L) drop(cov(x, .Call(Cpp_flaglead,y,l_seq,NA,ng,g,t,FALSE), use = "pairwise.complete.obs")) else drop(cov(x, .Call(Cpp_flaglead,y,l_seq,NA,ng,g,t,FALSE), use = "pairwise.complete.obs")/(fsd.default(x)*fsd.default(y))) # or complete obs ? d <- c(2*lag.max+1,1,1) dim(acf) <- d acf.out <- `oldClass<-`(list(acf = acf, type = type[1L], n.used = lx, lag = array(l_seq, d), series = snames, snames = snames), "acf") if (plot) { plot(acf.out, ylab = "Panel Series CCF", ...) invisible(acf.out) } else { if(!missing(...)) unused_arg_action(match.call(), ...) return(acf.out) } } # could do AR models also : # psar.data.frame <- function (x, aic = TRUE, order.max = lag.max, na.action = na.fail, # demean = TRUE, series = NULL, var.method = 1L, ...) # { # if (is.null(series)) # series <- l1orlst(as.character(substitute(x))) # if (ists <- is.ts(x)) # xtsp <- tsp(x) # x <- na.action(as.ts(x)) # if (anyNA(x)) # stop("NAs in 'x'") # if (ists) # xtsp <- tsp(x) # xfreq <- frequency(x) # x <- as.matrix(x) # nser <- ncol(x) # n.used <- nrow(x) # if (demean) { # x.mean <- colMeans(x) # x <- sweep(x, 2L, x.mean, check.margin = FALSE) # } # else x.mean <- rep(0, nser) # order.max <- if (is.null(order.max)) # floor(10 * log10(n.used)) # else floor(order.max) # if (order.max < 1L) # stop("'order.max' must be >= 1") # xacf <- acf(x, type = "cov", plot = FALSE, lag.max = order.max)$acf # z <- .C(stats:::C_"multi_yw", # aperm(xacf, 3:1), # as.integer(n.used), # as.integer(order.max), # as.integer(nser), # coefs = double((1L +order.max) * nser * nser), # pacf = double((1L + order.max) * nser * nser), # var = double((1L + order.max) * nser * nser), # aic = double(1L + order.max), # order = integer(1L), # as.integer(aic)) # partialacf <- aperm(array(z$pacf, dim = c(nser, nser, order.max + # 1L)), 3:1)[-1L, , , drop = FALSE] # var.pred <- aperm(array(z$var, dim = c(nser, nser, order.max + # 1L)), 3:1) # xaic <- setNames(z$aic - bmin(z$aic), 0:order.max) # order <- z$order # resid <- x # if (order > 0) { # ar <- -aperm(array(z$coefs, dim = c(nser, nser, order.max + # 1L)), 3:1)[2L:(order + 1L), , , drop = FALSE] # for (i in 1L:order) resid[-(1L:order), ] <- resid[-(1L:order), # ] - x[(order - i + 1L):(n.used - i), ] %*% t(ar[i, # , ]) # resid[1L:order, ] <- NA # } # else ar <- array(dim = c(0, nser, nser)) # var.pred <- var.pred[order + 1L, , , drop = TRUE] * n.used/(n.used - # nser * (demean + order)) # if (ists) { # attr(resid, "tsp") <- xtsp # attr(resid, "class") <- c("mts", "ts") # } # snames <- colnames(x) # colnames(resid) <- snames # dimnames(ar) <- list(seq_len(order), snames, snames) # dimnames(var.pred) <- list(snames, snames) # dimnames(partialacf) <- list(1L:order.max, snames, snames) # res <- list(order = order, ar = ar, var.pred = var.pred, # x.mean = x.mean, aic = xaic, n.used = n.used, order.max = order.max, # partialacf = partialacf, resid = resid, method = "Yule-Walker", # series = series, frequency = xfreq, call = match.call()) # oldClass(res) <- "ar" # return(res) # } collapse/R/fprod.R0000644000176200001440000001411314172367040013516 0ustar liggesusers # For foundational changes to this code see fsum.R fprod <- function(x, ...) UseMethod("fprod") # , x fprod.default <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fprod.matrix(x, g, w, TRA, na.rm, use.g.names, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fprod,x,0L,0L,w,na.rm)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(Cpp_fprod,x,length(lev),g,w,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fprod,x,fnlevels(g),g,w,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fprod,x,attr(g,"N.groups"),g,w,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(Cpp_fprod,x,g[[1L]],g[[2L]],w,na.rm), GRPnames(g))) return(.Call(Cpp_fprod,x,g[[1L]],g[[2L]],w,na.rm)) } if(is.null(g)) return(.Call(Cpp_TRA,x,.Call(Cpp_fprod,x,0L,0L,w,na.rm),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRA,x,.Call(Cpp_fprod,x,g[[1L]],g[[2L]],w,na.rm),g[[2L]],TtI(TRA)) } fprod.matrix <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fprodm,x,0L,0L,w,na.rm,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(Cpp_fprodm,x,length(lev),g,w,na.rm,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(Cpp_fprodm,x,fnlevels(g),g,w,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fprodm,x,attr(g,"N.groups"),g,w,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(Cpp_fprodm,x,g[[1L]],g[[2L]],w,na.rm,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(Cpp_fprodm,x,g[[1L]],g[[2L]],w,na.rm,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAm,x,.Call(Cpp_fprodm,x,0L,0L,w,na.rm,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAm,x,.Call(Cpp_fprodm,x,g[[1L]],g[[2L]],w,na.rm,FALSE),g[[2L]],TtI(TRA)) } fprod.data.frame <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fprodl,x,0L,0L,w,na.rm,drop)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(Cpp_fprodl,x,length(lev),g,w,na.rm,FALSE), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fprodl,x,fnlevels(g),g,w,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fprodl,x,attr(g,"N.groups"),g,w,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(Cpp_fprodl,x,g[[1L]],g[[2L]],w,na.rm,FALSE), groups)) return(.Call(Cpp_fprodl,x,g[[1L]],g[[2L]],w,na.rm,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAl,x,.Call(Cpp_fprodl,x,0L,0L,w,na.rm,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAl,x,.Call(Cpp_fprodl,x,g[[1L]],g[[2L]],w,na.rm,FALSE),g[[2L]],TtI(TRA)) } fprod.list <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) fprod.data.frame(x, g, w, TRA, na.rm, use.g.names, drop, ...) fprod.grouped_df <- function(x, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- l1orn(as.character(substitute(w)), NULL) nam <- attr(x, "names") gn2 <- gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) prodw <- NULL if(length(wsym) && length(wn <- whichv(nam, wsym))) { w <- .subset2(x, wn) if(any(gn == wn)) stop("Weights coincide with grouping variables!") gn <- c(gn, wn) if(keep.w) { if(nTRAl) prodw <- `names<-`(list(fprodCpp(w,g[[1L]],g[[2L]],NULL,na.rm)), paste0("prod.", wsym)) else if(keep.group_vars) gn2 <- gn else prodw <- gn2 <- wn } } gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], names(prodw), nam[-gn]) return(setAttributes(c(g[[4L]], prodw, .Call(Cpp_fprodl,x[-gn],g[[1L]],g[[2L]],w,na.rm,FALSE)), ax)) } ax[["names"]] <- c(names(prodw), nam[-gn]) return(setAttributes(c(prodw, .Call(Cpp_fprodl,x[-gn],g[[1L]],g[[2L]],w,na.rm,FALSE)), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]], .Call(Cpp_fprodl,x,g[[1L]],g[[2L]],w,na.rm,FALSE)), ax)) } else return(setAttributes(.Call(Cpp_fprodl,x,g[[1L]],g[[2L]],w,na.rm,FALSE), ax)) } else if(keep.group_vars || (keep.w && length(prodw))) { ax[["names"]] <- c(nam[gn2], nam[-gn]) return(setAttributes(c(x[gn2],.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fprodl,x[-gn],g[[1L]],g[[2L]],w,na.rm,FALSE),g[[2L]],TtI(TRA))), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fprodl,x[-gn],g[[1L]],g[[2L]],w,na.rm,FALSE),g[[2L]],TtI(TRA)), ax)) } else return(.Call(Cpp_TRAl,x,.Call(Cpp_fprodl,x,g[[1L]],g[[2L]],w,na.rm,FALSE),g[[2L]],TtI(TRA))) } collapse/R/pwcor_pwcov_pwnobs.R0000644000176200001440000003051414201327077016346 0ustar liggesusers# sumcc <- function(x, y) bsum(complete.cases(x,y)) # pwnobs <- function(x) qM(dapply(x, function(y) dapply(x, sumcc, y))) pwnobs <- function(X) { if(is.atomic(X) && is.matrix(X)) return(.Call(Cpp_pwnobsm, X)) # cn <- dimnames(X)[[2L]] # X <- mctl(X) if(!is.list(X)) stop("X must be a matrix or data.frame!") # -> if unequal length will warn below !! dg <- fnobs.data.frame(X) oldClass(X) <- NULL n <- length(X) nr <- length(X[[1L]]) N.mat <- diag(dg) for (i in 1:(n - 1L)) { miss <- is.na(X[[i]]) # faster than complete.cases, also for large data ! // subsetting X[[j]] faster ?? -> NOPE ! for (j in (i + 1L):n) N.mat[i, j] <- N.mat[j, i] <- nr - bsum(miss | is.na(X[[j]])) # bsum(complete.cases(X[[i]], X[[j]])) } dimnames(N.mat) <- list(names(dg), names(dg)) N.mat } pwNobs <- function(X) { message("Note that 'pwNobs' was renamed to 'pwnobs'. It will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") pwnobs(X) } # corr.p <- function(r, n) { # if (n < 3L) return(1) # df <- n - 2L # t <- sqrt(df) * r/sqrt(1 - r^2) # return(2 * bmin(pt(t, df), pt(t, df, lower.tail = FALSE))) # taken from corr.test # } corr.pmat <- function(cm, nm) { df <- nm - 2L acm <- abs(cm) diag(acm) <- NA_real_ # tiny bit faster here vs below.. `attributes<-`(2 * pt(sqrt(df) * acm/sqrt(1 - acm^2), df, lower.tail = FALSE), attributes(cm)) # n <- ncol(cm) # p.mat <- matrix(NA, n, n, dimnames = dimnames(cm)) # for (i in 1:(n - 1)) { # for (j in (i + 1):n) { # p.mat[i, j] <- p.mat[j, i] <- corr.p(cm[i, j], nm[i, j]) # } # } # p.mat } complpwnobs <- function(X) { # if(is.list(X)) { # Not needed anymore because now always coercing to matrix... # n <- length(unclass(X)) # coln <- attr(X, "names") # } else { n <- ncol(X) coln <- dimnames(X)[[2L]] # } matrix(bsum(complete.cases(X)), n, n, dimnames = list(coln, coln)) } # Test: # all.equal(Hmisc::rcorr(qM(mtcars))$P, corr.pmat(r, n)) namat <- function(X) { nc <- dim(X)[2L] cn <- dimnames(X)[[2L]] mat <- rep(NA_real_, nc * nc) dim(mat) <- c(nc, nc) diag(mat) <- 1 dimnames(mat) <- list(cn, cn) mat } nmat <- function(n, X) { nc <- dim(X)[2L] cn <- dimnames(X)[[2L]] mat <- rep(n, nc * nc) dim(mat) <- c(nc, nc) dimnames(mat) <- list(cn, cn) mat } # Check speed of it ... # Also check weighted cor p-value against lm() with weights -> Good !! # -> This is good # all.equal(unattrib(cov.wt(mtcars, w, cor = TRUE)$cor), unattrib(pwcor(mtcars, w = w))) # all.equal(unattrib(cov.wt(mtcars, w, cor = TRUE)$cor), unattrib(pwcor(mtcars, w = w, use = "complete.obs"))) # all.equal(pwcor(mtcars, w = w), pwcor(mtcars, w = w, use = "complete.obs")) pwcor <- function(X, ..., w = NULL, N = FALSE, P = FALSE, array = TRUE, use = "pairwise.complete.obs") { if(is.list(X)) X <- do.call(cbind, X) lcc <- FALSE if(is.null(w)) r <- cor(X, ..., use = use) else if(use == "pairwise.complete.obs") r <- getenvFUN("weights_wtd.cors")(X, ..., weight = w) else { if(!missing(...)) stop("y is currently not supported with weighted correlations and use != 'pairwise.complete.obs'") cc <- which(complete.cases(X, w)) lcc <- length(cc) if(use == "all.obs" && lcc != length(w)) stop("missing observations in cov/cor") if(lcc) { if(lcc != length(w)) { X <- X[cc, , drop = FALSE] w <- w[cc] } r <- cov2cor(crossprod(sqrt(w) * BWmCpp(X, w = w, narm = FALSE))) # all.equal(cov2cor(crossprod(sqrt(w) * BWmCpp(X, w = w, narm = FALSE))), weights::wtd.cors(X, weight = w)) } else r <- switch(use, complete.obs = stop("no complete element pairs"), namat(X)) } if(!(N || P)) return(`oldClass<-`(r, c("pwcor", "matrix"))) n <- if(lcc) nmat(lcc, X) else switch(use, pairwise.complete.obs = pwnobs(X), complpwnobs(X)) # TODO: what about weights paiwrise ? # what if using ... to supply y ??? if(N) { res <- if(P) list(r = r, N = n, P = corr.pmat(r, n)) else list(r = r, N = n) } else res <- list(r = r, P = corr.pmat(r, n)) if(array) { res <- fsimplify2array(res) oldClass(res) <- c("pwcor","array","table") } else oldClass(res) <- "pwcor" res } # Not all equal... # all.equal(unattrib(cov.wt(mtcars, w)$cov), unattrib(pwcov(mtcars, w = w))) # all.equal(unattrib(cov.wt(mtcars, w)$cov), unattrib(pwcov(mtcars, w = w, use = "complete.obs"))) # all.equal(pwcov(mtcars, w = w), pwcov(mtcars, w = w, use = "complete.obs")) -> Yes ! pwcov <- function(X, ..., w = NULL, N = FALSE, P = FALSE, array = TRUE, use = "pairwise.complete.obs") { if(is.list(X)) X <- do.call(cbind, X) lcc <- FALSE if(is.null(w)) r <- cov(X, ..., use = use) else if(use == "pairwise.complete.obs") { r <- getenvFUN("weights_wtd.cors")(X, ..., weight = w) # sw <- bsum(w, na.rm = TRUE) Xsd <- fsd(X, w = w) # * (sw-1) / (1 - bsum((w/sw)^2)) # cov.wt, method = "unbiased" ??? r <- if(missing(...)) r * outer(Xsd, Xsd) else r * outer(Xsd, fsd(..., w = w)) } else { if(!missing(...)) stop("y is currently not supported with weighted correlations and use != 'pairwise.complete.obs'") cc <- which(complete.cases(X, w)) lcc <- length(cc) if(use == "all.obs" && lcc != length(w)) stop("missing observations in cov/cor") if(lcc) { if(lcc != length(w)) { X <- X[cc, , drop = FALSE] w <- w[cc] } r <- crossprod(sqrt(w) * BWmCpp(X, w = w, narm = FALSE)) / (bsum(w) - 1) # Check numeric accuracy ! # w <- w/bsum(w) # same method as cov.wt, method = "unbiased" # r <- crossprod(sqrt(w) * BWmCpp(X, w = w, narm = FALSE)) / (1 - bsum(w^2)) } else r <- switch(use, complete.obs = stop("no complete element pairs"), namat(X)) # namat correct ?? } if(!(N || P)) return(`oldClass<-`(r, c("pwcov", "matrix"))) n <- if(lcc) nmat(lcc, X) else switch(use, pairwise.complete.obs = pwnobs(X), complpwnobs(X)) # TODO: what about weights paiwrise ? if(N) { # good ??? // cov(X) / outer(fsd(X), fsd(X)) res <- if(P) list(cov = r, N = n, P = corr.pmat(cov2cor(r), n)) else list(cov = r, N = n) # what about x and y here ?? } else res <- list(cov = r, P = corr.pmat(cov2cor(r), n)) if(array) { res <- fsimplify2array(res) oldClass(res) <- c("pwcov","array","table") } else oldClass(res) <- "pwcov" res } print.pwcor <- function(x, digits = 2L, sig.level = 0.05, show = c("all","lower.tri","upper.tri"), spacing = 1L, return = FALSE, ...) { formfun <- function(x, dg1 = FALSE) { xx <- format(round(x, digits)) # , digits = digits-1 xx <- sub("(-?)0\\.", "\\1.", xx) if(dg1) { dgx <- diag(xx) new1 <- paste0(c(" 1", rep(" ",digits-1)), collapse = "") if(!all(st <- startsWith(dgx, " 1") | startsWith(dgx, "1"))) { # can have positive or negative values... dgx[st] <- new1 diag(xx) <- dgx } else diag(xx) <- new1 } else { xna <- is.na(x) xx[xna] <- "" xpos <- x >= 1 & !xna xx[xpos] <- sub(paste0(c(".", rep("0",digits)), collapse = ""), "", xx[xpos]) # Problem: Deletes .00 also.. } return(xx) } show <- switch(show[1L], all = 1L, lower.tri = 2L, upper.tri = 3L, stop("Unknown 'show' option")) se <- "Allowed spacing options are 0, 1 and 2!" if(is.array(x)) { sc <- TRUE d <- dim(x) ld <- length(d) if(ld > 2L) { dn <- dimnames(x) d3 <- dn[[3L]] if(all(d3 %in% c("r","N","P"))) { if(length(d3) == 3L) { sig <- matrix(" ", d[1L], d[2L]) sig[x[,, 3L] <= sig.level] <- "*" res <- sprintf(switch(spacing+1L, "%s%s(%i)", "%s%s (%i)", " %s%s (%i)", stop(se)), formfun(x[,, 1L], TRUE), sig, x[,, 2L]) # paste0(formfun(x[,, 1L]),sig,"(",x[,, 2L],")") } else if(d3[2L] == "P") { sig <- matrix(" ", d[1L], d[2L]) sig[x[,, 2L] <= sig.level] <- "*" res <- sprintf(switch(spacing+1L, "%s%s", " %s%s", " %s %s", stop(se)), formfun(x[,, 1L], TRUE), sig) } else res <- sprintf(switch(spacing+1L, "%s(%i)", "%s (%i)", " %s (%i)", stop(se)), formfun(x[,, 1L], TRUE), x[,, 2L]) } else { sc <- FALSE res <- duplAttributes(switch(spacing+1L, formfun(x), sprintf(" %s",formfun(x)), sprintf(" %s",formfun(x)), stop(se)), x) # remove this before publishing !!! } if(sc) attributes(res) <- list(dim = d[1:2], dimnames = dn[1:2]) } else res <- if(spacing == 0L) formfun(x, TRUE) else duplAttributes(sprintf(switch(spacing," %s"," %s",stop(se)), formfun(x, TRUE)), x) if(sc && show != 1L) if(show == 2L) res[upper.tri(res)] <- "" else res[lower.tri(res)] <- "" } else if(is.list(x)) { if(spacing == 0L) res <- lapply(x, formfun) else { ff <- function(i) duplAttributes(sprintf(switch(spacing," %s"," %s",stop(se)),formfun(i)), i) res <- lapply(x, ff) } if(show != 1L) res <- if(show == 2L) lapply(res, function(i){i[upper.tri(i)] <- ""; i}) else lapply(res, function(i){i[lower.tri(i)] <- ""; i}) } else res <- formfun(x) if(return) return(unclass(res)) print.default(unclass(res), quote = FALSE, right = TRUE, ...) invisible(x) } #print.table(dapply(round(x, digits), function(j) sub("^(-?)0.", "\\1.", j)), right = TRUE, ...) # print.table(, right = TRUE) print.pwcov <- function(x, digits = 2L, sig.level = 0.05, show = c("all","lower.tri","upper.tri"), spacing = 1L, return = FALSE, ...) { formfun <- function(x, adj = FALSE) { xx <- format(round(x, digits), digits = 9, big.mark = "'", big.interval = 6) # xx <- sub("(-?)0\\.", "\\1.", xx) # Not needed here... if(adj) { xna <- is.na(x) xx[xna] <- "" xpos <- x >= 1 & !xna xx[xpos] <- sub(paste0(c(".", rep("0",digits)), collapse = ""), "", xx[xpos]) # Problem: Deletes .00 also.. } return(xx) } show <- switch(show[1L], all = 1L, lower.tri = 2L, upper.tri = 3L, stop("Unknown 'show' option")) se <- "Allowed spacing options are 0, 1 and 2!" if(is.array(x)) { sc <- TRUE d <- dim(x) ld <- length(d) if(ld > 2L) { dn <- dimnames(x) d3 <- dn[[3L]] if(all(d3 %in% c("cov","N","P"))) { if(length(d3) == 3L) { sig <- matrix(" ", d[1L], d[2L]) sig[x[,, 3L] <= sig.level] <- "*" res <- sprintf(switch(spacing+1L, "%s%s(%i)", "%s%s (%i)", " %s%s (%i)", stop(se)), formfun(x[,, 1L]), sig, x[,, 2L]) # paste0(formfun(x[,, 1L]),sig,"(",x[,, 2L],")") } else if(d3[2L] == "P") { sig <- matrix(" ", d[1L], d[2L]) sig[x[,, 2L] <= sig.level] <- "*" res <- sprintf(switch(spacing+1L, "%s%s", " %s%s", " %s %s", stop(se)), formfun(x[,, 1L]), sig) } else res <- sprintf(switch(spacing+1L, "%s(%i)", "%s (%i)", " %s (%i)", stop(se)), formfun(x[,, 1L]), x[,, 2L]) } else { sc <- FALSE res <- duplAttributes(switch(spacing+1L, formfun(x, TRUE), sprintf(" %s",formfun(x, TRUE)), sprintf(" %s",formfun(x, TRUE)), stop(se)), x) # remove this before publishing !!! } if(sc) attributes(res) <- list(dim = d[1:2], dimnames = dn[1:2]) } else res <- if(spacing == 0L) formfun(x) else duplAttributes(sprintf(switch(spacing," %s"," %s",stop(se)), formfun(x)), x) if(sc && show != 1L) if(show == 2L) res[upper.tri(res)] <- "" else res[lower.tri(res)] <- "" } else if(is.list(x)) { if(spacing == 0L) res <- lapply(x, formfun, TRUE) else { ff <- function(i) duplAttributes(sprintf(switch(spacing," %s"," %s",stop(se)),formfun(i, TRUE)), i) res <- lapply(x, ff) } if(show != 1L) res <- if(show == 2L) lapply(res, function(i){i[upper.tri(i)] <- ""; i}) else lapply(res, function(i){i[lower.tri(i)] <- ""; i}) } else res <- formfun(x) if(return) return(unclass(res)) print.default(unclass(res), quote = FALSE, right = TRUE, ...) invisible(x) } #print.table(dapply(round(x, digits), function(j) sub("^(-?)0.", "\\1.", j)), right = TRUE, ...) # print.table(, right = TRUE) # print.pwcov <- function(x, digits = 2, ...) print.default(formatC(round(x, digits), format = "g", # digits = 9, big.mark = "'", big.interval = 6), quote = FALSE, right = TRUE, ...) `[.pwcor` <- `[.pwcov` <- function(x, i, j, ..., drop = TRUE) `oldClass<-`(NextMethod(), oldClass(x)) collapse/R/flast.R0000644000176200001440000001246414172367040013524 0ustar liggesusers # Note: for foundational changes to this code see fsum.R flast <- function(x, ...) UseMethod("flast") # , x flast.default <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(flast.matrix(x, g, TRA, na.rm, use.g.names, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(C_flast,x,0L,0L,na.rm)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(C_flast,x,length(lev),g,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(C_flast,x,fnlevels(g),g,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(C_flast,x,attr(g,"N.groups"),g,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(C_flast,x,g[[1L]],g[[2L]],na.rm), GRPnames(g))) return(.Call(C_flast,x,g[[1L]],g[[2L]],na.rm)) } if(is.null(g)) return(.Call(Cpp_TRA,x,.Call(C_flast,x,0L,0L,na.rm),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRA,x,.Call(C_flast,x,g[[1L]],g[[2L]],na.rm),g[[2L]],TtI(TRA)) } flast.matrix <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(C_flastm,x,0L,0L,na.rm,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(C_flastm,x,length(lev),g,na.rm,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(C_flastm,x,fnlevels(g),g,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_flastm,x,attr(g,"N.groups"),g,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(C_flastm,x,g[[1L]],g[[2L]],na.rm,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(C_flastm,x,g[[1L]],g[[2L]],na.rm,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAm,x,.Call(C_flastm,x,0L,0L,na.rm,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAm,x,.Call(C_flastm,x,g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TtI(TRA)) } flast.data.frame <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) if(drop) return(unlist(.Call(C_flastl,x,0L,0L,na.rm))) else return(.Call(C_flastl,x,0L,0L,na.rm)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(C_flastl,x,length(lev),g,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(C_flastl,x,fnlevels(g),g,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(C_flastl,x,attr(g,"N.groups"),g,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(C_flastl,x,g[[1L]],g[[2L]],na.rm), groups)) return(.Call(C_flastl,x,g[[1L]],g[[2L]],na.rm)) } if(is.null(g)) return(.Call(Cpp_TRAl,x,.Call(C_flastl,x,0L,0L,na.rm),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAl,x,.Call(C_flastl,x,g[[1L]],g[[2L]],na.rm),g[[2L]],TtI(TRA)) } flast.list <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) flast.data.frame(x, g, TRA, na.rm, use.g.names, drop, ...) flast.grouped_df <- function(x, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam[-gn]) return(setAttributes(c(g[[4L]],.Call(C_flastl,x[-gn],g[[1L]],g[[2L]],na.rm)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(C_flastl,x[-gn],g[[1L]],g[[2L]],na.rm), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]],.Call(C_flastl,x,g[[1L]],g[[2L]],na.rm)), ax)) } else return(setAttributes(.Call(C_flastl,x,g[[1L]],g[[2L]],na.rm), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(nam[gn], nam[-gn]) return(setAttributes(c(x[gn],.Call(Cpp_TRAl,x[-gn],.Call(C_flastl,x[-gn],g[[1L]],g[[2L]],na.rm),g[[2L]],TtI(TRA))), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(Cpp_TRAl,x[-gn],.Call(C_flastl,x[-gn],g[[1L]],g[[2L]],na.rm),g[[2L]],TtI(TRA)), ax)) } else return(.Call(Cpp_TRAl,x,.Call(C_flastl,x,g[[1L]],g[[2L]],na.rm),g[[2L]],TtI(TRA))) } collapse/R/psmat.R0000644000176200001440000001572714174223734013547 0ustar liggesusers psmat <- function(x, ...) UseMethod("psmat") # , x psmat.default <- function(x, g, t = NULL, transpose = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.matrix(x)) stop("x is already a matrix") if(is.atomic(g) && length(g) == 1L) { if(transpose) matrix(x, ncol = round(g), dimnames = list(seq_len(length(x)/round(g)), paste0("GRP.",seq_len(g)))) else matrix(x, nrow = round(g), byrow = TRUE, dimnames = list(paste0("GRP.",seq_len(g)), seq_len(length(x)/round(g)))) } else { if(!is.nmfactor(g)) if(is.atomic(g)) g <- qF(g, na.exclude = FALSE) else if(is_GRP(g)) g <- as_factor_GRP(g) else g <- as_factor_GRP(GRP.default(g, call = FALSE)) if(is.null(t)) { # message("No timevar provided: Assuming Balanced Panel") return(.Call(Cpp_psmat,x, g, NULL, transpose)) } else { if(!is.nmfactor(t)) if(is.atomic(t)) t <- qF(t, na.exclude = FALSE) else if(is_GRP(t)) t <- as_factor_GRP(t) else t <- as_factor_GRP(GRP.default(t, call = FALSE)) return(.Call(Cpp_psmat,x, g, t, transpose)) } } } psmat.data.frame <- function(x, by, t = NULL, cols = NULL, transpose = FALSE, array = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) oldClass(x) <- NULL # Setting globally ! if(is.atomic(by) && length(by) == 1L) { nr <- length(x[[1L]]) n <- round(by) if(length(cols)) x <- x[cols2int(cols, x, names(x), FALSE)] if(transpose) { dn <- list(seq_len(nr/n), paste0("GRP.",seq_len(by))) res <- lapply(x, matrix, ncol = n, dimnames = dn) } else { dn <- list(paste0("GRP.",seq_len(by)), seq_len(nr/n)) res <- lapply(x, matrix, nrow = n, byrow = TRUE, dimnames = dn) } } else { if(is.call(by)) { nam <- names(x) if(length(by) == 3L) { v <- ckmatch(all.vars(by[[2L]]), nam) by <- ckmatch(all.vars(by[[3L]]), nam) } else { by <- ckmatch(all.vars(by), nam) v <- if(is.null(cols)) seq_along(x)[-by] else fsetdiff(cols2int(cols, x, nam), by) } by <- if(length(by) == 1L) x[[by]] else GRP.default(x, by, call = FALSE) if(is.call(t)) { # If time-variable supplied ! t <- ckmatch(all.vars(t), nam) v <- fsetdiff(v, t) t <- if(length(t) == 1L) x[[t]] else GRP.default(x, t, call = FALSE) } x <- x[v] } else if(length(cols)) x <- x[cols2int(cols, x, names(x), FALSE)] if(!is.nmfactor(by)) if(is.atomic(by)) by <- qF(by, na.exclude = FALSE) else if(is_GRP(by)) by <- as_factor_GRP(by) else by <- as_factor_GRP(GRP.default(by, call = FALSE)) if(is.null(t)) { # message("No timevar provided: Assuming Balanced Panel") res <- lapply(x, psmatCpp, by, NULL, transpose) } else { if(!is.nmfactor(t)) if(is.atomic(t)) t <- qF(t, na.exclude = FALSE) else if(is_GRP(t)) t <- as_factor_GRP(t) else t <- as_factor_GRP(GRP.default(t, call = FALSE)) res <- lapply(x, psmatCpp, by, t, transpose) } } if(array) { if(length(res) == 1L) return(res[[1L]]) else return(addAttributes(fsimplify2array(res), list(transpose = transpose, class = c("psmat","array")))) } else return(res) } psmat.pseries <- function(x, transpose = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) index <- unclass(getpix(attr(x, "index"))) if(is.matrix(x)) stop("x is already a matrix") if(length(index) > 2L) index <- list(finteraction(index[-length(index)]), index[[length(index)]]) .Call(Cpp_psmat, x, index[[1L]], index[[2L]], transpose) } psmat.pdata.frame <- function(x, cols = NULL, transpose = FALSE, array = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) oldClass(x) <- NULL index <- unclass(getpix(attr(x, "index"))) if(length(index) > 2L) index <- list(finteraction(index[-length(index)]), index[[length(index)]]) res <- lapply(if(is.null(cols)) x else x[cols2int(cols, x, names(x), FALSE)], psmatCpp, index[[1L]], index[[2L]], transpose) if(array) { if(length(res) == 1L) return(res[[1L]]) else return(addAttributes(fsimplify2array(res), list(transpose = transpose, class = c("psmat","array")))) } else return(res) } plot.psmat <- function(x, legend = FALSE, colours = legend, labs = NULL, grid = FALSE, ...) { d <- dim(x) arl <- length(d) == 3L if(isFALSE(attr(x, "transpose"))) { x <- if(arl) aperm(x, c(2L, 1L, 3L)) else t.default(x) d <- dim(x) } dn <- dimnames(x) colours <- if(isTRUE(colours)) rainbow(d[2L]) else if(isFALSE(colours)) TRUE else colours t <- as.numeric(dn[[1L]]) if(!is.na(t[1L])) { mint <- bmin(t) maxt <- bmax(t) } else { mint <- 1L maxt <- length(t) } ns <- d[2L] dots <- list(...) if(arl) { vars <- if(is.null(labs)) dn[[3L]] else labs nv <- d[3L] if(nv == 2L) mfr <- c(1L, 2L + legend) else if(nv + legend <= 4L) mfr <- c(2L, 2L) else { sqnv <- sqrt(nv) fsqnv <- floor(sqnv) mfr <- if(sqnv == fsqnv) c(fsqnv+legend,fsqnv) else c(fsqnv + 1L, fsqnv) } oldpar <- par(mfrow = mfr, mar = c(2.5, 2.5, 2.1, 1.5), mgp = c(2.5, 1, 0)) on.exit(par(oldpar)) for(i in seq_along(vars)) { ts.plot(ts(x[, , i], mint, maxt), main = vars[i], col = colours, xlab = NULL, ...) if(grid) grid() } if(legend) { plot(1:10, type = "n", axes = FALSE, xlab = NA, ylab = NA) legend(x = 0, y = if(nv == 2L) 10.5 else 10.75, # 'topleft', dn[[2L]], col = colours, lty = if(any(names(dots) == "lty")) dots[["lty"]] else 1L, cex= if(ns > 80L) 1-sqrt(ns)/sqrt(1150) else 1, bty = "n", xpd = TRUE, # y.intersp = 0.5, x.intersp = 0.5, ncol = if(ns <= 10L) 1L else if(nv == 2L) floor(ns^.32) else floor(ns^.39)) # .37 } } else { ts.plot(ts(x, mint, maxt), col = colours, ...) if(grid) grid() if(legend) legend('topleft', dn[[2L]], col = colours, lty = if(any(names(dots) == "lty")) dots[["lty"]] else 1L, cex= if(ns > 80L) 1-sqrt(ns)/sqrt(1150) else 1, bty = "n", xpd = TRUE, # y.intersp = 0.5, x.intersp = 0.5, ncol = if(d[2L] <= 10L) 1L else floor(d[2L]^.39)) #.37 } } # print.psmat <- print.qsu # nah, too expensive print.psmat <- function(x, digits = 3, ...) { print.default(`attr<-`(unclass(x), "transpose", NULL), digits = digits, ...) } `[.psmat` <- function(x, i, j, ..., drop = TRUE) { ret <- NextMethod() if(length(dim(ret)) > 1L) { attr(ret, "transpose") <- attr(x, "transpose") oldClass(ret) <- oldClass(x) } ret } aperm.psmat <- function(a, perm = NULL, resize = TRUE, keep.class = TRUE, ...) { r <- aperm.default(a, perm, resize = resize) if(keep.class) { attr(r, "transpose") <- attr(a, "transpose") oldClass(r) <- oldClass(a) } r } collapse/R/roworder_colorder_rename.R0000644000176200001440000001556414201327077017501 0ustar liggesusers roworder <- function(X, ..., na.last = TRUE) { ovars <- .c(...) if(!length(ovars)) stop("... needs to be comma-separated column names, optionally with a '-' prefix for descending order.") dec <- startsWith(ovars, "-") if(any(dec)) ovars[dec] <- substr(ovars[dec], 2L, 1000000L) z <- as.pairlist(.subset(X, ckmatch(ovars, attr(X, "names")))) o <- .Call(C_radixsort, na.last, dec, FALSE, FALSE, TRUE, z) if(!is.na(na.last) && attr(o, "sorted")) return(condalc(X, inherits(X, "data.table"))) rn <- attr(X, "row.names") if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") return(.Call(C_subsetDT, X, o, seq_along(unclass(X)), FALSE)) return(`attr<-`(.Call(C_subsetDT, X, o, seq_along(unclass(X)), FALSE), "row.names", rn[o])) } posord <- function(sq, o, pos) switch(pos, front = c(o, sq[-o]), end = c(sq[-o], o), exchange = `[<-`(sq, o[forder.int(o)], value = o), after = { if(length(o) == 1L) stop('Need o supply at least 2 columns if pos = "after"') om1 <- o[-1L] smo <- sq[-om1] w1 <- whichv(smo, o[1L]) c(smo[1L:w1], om1, smo[(w1+1L):length(smo)]) }, stop("pos must be 'front', 'end', 'exchange' or 'after'.")) roworderv <- function(X, cols = NULL, neworder = NULL, decreasing = FALSE, na.last = TRUE, pos = "front") { if(is.null(neworder)) { if(is.null(cols)) { if(inherits(X, "sf")) { Xo <- X oldClass(Xo) <- NULL Xo[[attr(Xo, "sf_column")]] <- NULL neworder <- radixorderv(Xo, na.last, decreasing) } else neworder <- radixorderv(X, na.last, decreasing) } else neworder <- radixorderv(colsubset(X, cols), na.last, decreasing) if(!is.na(na.last) && attr(neworder, "sorted")) return(condalc(X, inherits(X, "data.table"))) } else { if(!is.integer(neworder)) neworder <- if(is.numeric(neworder)) as.integer(neworder) else if(is.logical(neworder)) which(neworder) else stop("neworder should be integer or logical.") if(length(neworder) != fnrow2(X)) neworder <- posord(seq_along(.subset2(X, 1L)), neworder, pos) } rn <- attr(X, "row.names") if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") return(.Call(C_subsetDT, X, neworder, seq_along(unclass(X)), FALSE)) return(`attr<-`(.Call(C_subsetDT, X, neworder, seq_along(unclass(X)), FALSE), "row.names", rn[neworder])) } colorder <- function(.X, ..., pos = "front") { # This also takes names and indices .... ax <- attributes(.X) oldClass(.X) <- NULL # attributes ? nam <- names(.X) iX <- seq_along(.X) nl <- `names<-`(as.vector(iX, "list"), nam) vars <- eval(substitute(c(...)), nl, parent.frame()) if(!is.integer(vars)) stop(paste0("Unknown columns: ", .c(...))) if(length(names(vars))) { # Allow renaming during selection nam_vars <- names(vars) nonmiss <- nzchar(nam_vars) nam[vars[nonmiss]] <- nam_vars[nonmiss] } if(length(vars) != length(iX)) vars <- posord(iX, vars, pos) return(condalcSA(.X[vars], `[[<-`(ax, "names", nam[vars]), any(ax[["class"]] == "data.table"))) } colorderv <- function(X, neworder = radixorder(names(X)), pos = "front", regex = FALSE, ...) { # This also takes names and indices .... ax <- attributes(X) oldClass(X) <- NULL # attributes ? nam <- names(X) if(regex) vars <- rgrep(neworder, nam, ..., sort = FALSE) else { if(!missing(...)) unused_arg_action(match.call(), ...) vars <- cols2int(neworder, X, nam) } if(length(vars) != length(X)) vars <- posord(seq_along(X), vars, pos) return(condalcSA(X[vars], `[[<-`(ax, "names", nam[vars]), any(ax[["class"]] == "data.table"))) } frename <- function(.x, ..., cols = NULL) { args <- substitute(c(...))[-1L] nam <- attr(.x, "names") namarg <- names(args) if(is.null(namarg) || !all(nzchar(namarg))) { if(!is.function(..1)) stop("... needs to be expressions colname = newname, or a function to apply to the names of columns in cols.") FUN <- if(...length() == 1L) ..1 else # could do special case if ...length() == 2L function(x) do.call(..1, c(list(x), list(...)[-1L])) if(is.null(cols)) return(condalc(`attr<-`(.x, "names", FUN(nam)), inherits(.x, "data.table"))) ind <- cols2int(cols, .x, nam, FALSE) nam[ind] <- FUN(nam[ind]) } else nam[ckmatch(namarg, nam)] <- as.character(args) return(condalc(`attr<-`(.x, "names", nam), inherits(.x, "data.table"))) } rnm <- frename # rnm clashes with 2 packages.., rme would work but is inconsistent # A tiny bit faster than setrename <- function(.x, ..., cols = NULL) eval.parent(substitute(.x <- frename(.x, ..., cols = cols))), but not much... setrename <- function(.x, ..., cols = NULL) { args <- substitute(c(...))[-1L] nam <- attr(.x, "names") namarg <- names(args) if(is.null(namarg) || !all(nzchar(namarg))) { if(!is.function(..1)) stop("... needs to be expressions colname = newname, or a function to apply to the names of columns in cols.") FUN <- if(...length() == 1L) ..1 else # could do special case if ...length() == 2L function(x) do.call(..1, c(list(x), list(...)[-1L])) if(is.null(cols)) nam <- FUN(nam) else { ind <- cols2int(cols, .x, nam, FALSE) nam[ind] <- FUN(nam[ind]) } } else nam[ckmatch(namarg, nam)] <- as.character(args) # Need to allocate here, because the named are captured in ".internal.selfref", so modification be reference still produces an error. if(inherits(.x, "data.table")) assign(as.character(substitute(.x)), alc(`attr<-`(.x, "names", nam)), envir = parent.frame()) invisible(.Call(C_setnames, .x, nam)) } # setrnm <- setrename relabel <- function(.x, ..., cols = NULL, attrn = "label") { # , sc = TRUE args <- list(...) nam <- attr(.x, "names") namarg <- names(args) if(is.null(namarg) || !all(nzchar(namarg))) { if(!is.function(..1)) stop("... needs to be expressions colname = newname, or a function to apply to the names of columns in cols.") lab <- vlabels(.x, attrn, FALSE) FUN <- if(...length() == 1L) ..1 else # could do special case if ...length() == 2L function(x) do.call(..1, c(list(x), list(...)[-1L])) if(is.null(cols)) return(.Call(C_setvlabels, .x, attrn, FUN(lab), NULL)) ind <- cols2int(cols, .x, nam, FALSE) args <- FUN(lab[ind]) } else ind <- ckmatch(namarg, nam) .Call(C_setvlabels, .x, attrn, args, ind) } setrelabel <- function(.x, ..., cols = NULL, attrn = "label") invisible(relabel(.x, ..., cols = cols, attrn = attrn)) collapse/R/collap.R0000644000176200001440000006366714176656240013707 0ustar liggesusers# Need generic version for column-parallel apply and aggregating weights.. fsum_uw <- function(x, g, w, ...) fsum(x, g, ...) fprod_uw <- function(x, g, w, ...) fsum(x, g, ...) fmean_uw <- function(x, g, w, ...) fmean(x, g, ...) fmedian_uw <- function(x, g, w, ...) fmedian(x, g, ...) fvar_uw <- function(x, g, w, ...) fvar(x, g, ...) fsd_uw <- function(x, g, w, ...) fsd(x, g, ...) fmode_uw <- function(x, g, w, ...) fmode(x, g, ...) fnth_uw <- function(x, n, g, w, ...) fmode(x, n, g, ...) fmin_uw <- function(x, g, w, ...) fmin(x, g, ...) fmax_uw <- function(x, g, w, ...) fmax(x, g, ...) ffirst_uw <- function(x, g, w, ...) ffirst(x, g, ...) flast_uw <- function(x, g, w, ...) flast(x, g, ...) fnobs_uw <- function(x, g, w, ...) fnobs(x, g, ...) fndistinct_uw <- function(x, g, w, ...) fndistinct(x, g, ...) fNobs_uw <- function(x, g, w, ...) fnobs(x, g, ...) fNdistinct_uw <- function(x, g, w, ...) fndistinct(x, g, ...) mymatchfun <- function(FUN) { if(is.function(FUN)) return(FUN) switch(tochar(FUN), # cat(paste0(FSF, " = ", FSF, ",\n")) fmean = fmean, fmedian = fmedian, fmode = fmode, fsum = fsum, fprod = fprod, fsd = fsd, fvar = fvar, fmin = fmin, fmax = fmax, fnth = fnth, ffirst = ffirst, flast = flast, fnobs = fnobs, fndistinct = fndistinct, fNobs = fnobs, fNdistinct = fndistinct, # cat(paste0(paste0(FSF, "_uw"), " = ", paste0(FSF, "_uw"), ",\n")) fmean_uw = fmean_uw, fmedian_uw = fmedian_uw, fmode_uw = fmode_uw, fsum_uw = fsum_uw, fprod_uw = fprod_uw, fsd_uw = fsd_uw, fvar_uw = fvar_uw, fmin_uw = fmin_uw, fmax_uw = fmax_uw, fnth_uw = fnth_uw, ffirst_uw = ffirst_uw, flast_uw = flast_uw, fnobs_uw = fnobs_uw, fndistinct_uw = fndistinct_uw, fNobs_uw = fnobs_uw, fNdistinct_uw = fndistinct_uw, match.fun(FUN)) # get(FUN, mode = "function", envir = parent.frame(2)) -> no error message } # Column-level parallel implementation applyfuns_internal <- function(data, by, FUN, fFUN, parallel, cores, ...) { oldClass(data) <- "data.frame" # Needed for correct method dispatch for fast functions... if(is.list(FUN)) { if(parallel) return(lapply(seq_along(FUN), function(i) if(fFUN[i]) mclapply(data, FUN[[i]], g = by, ..., use.g.names = FALSE, mc.cores = cores) else mclapply(data, copysplaplfun, by, FUN[[i]], ..., mc.cores = cores))) # BY.data.frame(data, by, FUN[[i]], ..., use.g.names = FALSE, parallel = parallel, mc.cores = cores))) return(lapply(seq_along(FUN), function(i) if(fFUN[i]) FUN[[i]](data, g = by, ..., use.g.names = FALSE) else lapply(data, copysplaplfun, by, FUN[[i]], ...))) # BY.data.frame(data, by, FUN[[i]], ..., use.g.names = FALSE) } if(parallel) if(fFUN) return(list(mclapply(data, FUN, g = by, ..., use.g.names = FALSE, mc.cores = cores))) else return(list(mclapply(data, copysplaplfun, by, FUN, ..., mc.cores = cores))) # list(BY.data.frame(data, by, FUN, ..., use.g.names = FALSE, parallel = parallel, mc.cores = cores)) if(fFUN) return(list(FUN(data, g = by, ..., use.g.names = FALSE))) list(lapply(data, copysplaplfun, by, FUN, ...)) # list(BY.data.frame(data, by, FUN, ..., use.g.names = FALSE)) } # CHeck this : # X = wlddev; by = ~ iso3c; wFUN = .c(fmean, fsd); w = ~ ODA # NOTE: CUSTOM SEPARATOR doesn't work because of unlist() !!!!!!!!!!!! # keep.w toggle w being kept even if passed externally ? -> Also not done with W, B , etc !! -> but they also don't keep by .. collap <- function(X, by, FUN = fmean, catFUN = fmode, cols = NULL, w = NULL, wFUN = fsum, custom = NULL, keep.by = TRUE, keep.w = TRUE, keep.col.order = TRUE, sort = TRUE, decreasing = FALSE, na.last = TRUE, return.order = sort, method = "auto", parallel = FALSE, mc.cores = 2L, return = c("wide","list","long","long_dupl"), give.names = "auto", ...) { return <- switch(return[1L], wide = 1L, list = 2L, long = 3L, long_dupl = 4L, stop("Unknown return output option")) widel <- return == 1L ncustoml <- is.null(custom) autorn <- is.character(give.names) && give.names == "auto" nwl <- is.null(w) if(inherits(X, "data.frame")) DTl <- inherits(X, "data.table") else { X <- qDF(X) DTl <- FALSE } ax <- attributes(X) oldClass(X) <- NULL if(length(X[[1L]]) == 0L) stop("data passed to collap() has 0 rows.") #160, 0 rows can cause segfault... nam <- names(X) # attributes(X) <- NULL # attr(X, "class") <- "data.frame" # class needed for method dispatch of fast functions, not for BY ! # cl <- if(parallel) makeCluster(mc.cores) else NULL # aplyfun <- if(parallel) function(...) mclapply(..., mc.cores = mc.cores) else lapply # identifying by and cols vl <- TRUE bycalll <- is.call(by) if(bycalll) { if(length(by) == 3L) { v <- ckmatch(all.vars(by[[2L]]), nam) numby <- ckmatch(all.vars(by[[3L]]), nam) } else { numby <- ckmatch(all.vars(by), nam) if(ncustoml) v <- if(is.null(cols)) seq_along(X)[-numby] else cols2int(cols, X, nam) } by <- GRP.default(X, numby, sort, decreasing, na.last, keep.by, return.order, method, call = FALSE) } else if(is.atomic(by)) { numby <- 1L if(ncustoml) if(is.null(cols)) vl <- FALSE else v <- cols2int(cols, X, nam) by <- GRP.default(`names<-`(list(by), l1orlst(as.character(substitute(by)))), NULL, sort, decreasing, na.last, keep.by, return.order, method, call = FALSE) } else { if(ncustoml) if(is.null(cols)) vl <- FALSE else v <- cols2int(cols, X, nam) if(!is_GRP(by)) { numby <- seq_along(unclass(by)) by <- GRP.default(by, numby, sort, decreasing, na.last, keep.by, return.order, method, call = FALSE) } else numby <- seq_along(by[[5L]]) } if(!nwl) { if(is.call(w)) { namw <- all.vars(w) numw <- ckmatch(namw, nam) if(vl && ncustoml) v <- v[v != numw] w <- X[[numw]] } else if(keep.w) { numw <- 0L # length(X) + 1L namw <- l1orlst(as.character(substitute(w))) } if(keep.w) { # what about function name for give.names ?? What about give.names and custom ??? if(is.function(wFUN)) { namwFUN <- l1orn(as.character(substitute(wFUN)), "wFUN") } else if(is.character(wFUN)) { namwFUN <- wFUN wFUN <- if(length(wFUN) > 1L) lapply(wFUN, mymatchfun) else mymatchfun(wFUN) } else if(is.list(wFUN)) { namwFUN <- names(wFUN) if(is.null(namwFUN)) namwFUN <- all.vars(substitute(wFUN)) } else stop("wFUN needs to be a function, character vector of function names or list of functions!") if(!all(namwFUN %in% .FAST_STAT_FUN_EXT)) stop("wFUN needs to be fast statistical functions, see print(.FAST_STAT_FUN)") if(is.list(wFUN)) { namw <- paste(namwFUN, namw, sep = ".") by[[4L]] <- c(if(keep.by) by[[4L]], `names<-`(lapply(wFUN, function(f) f(w, g = by, ..., use.g.names = FALSE)), namw)) if(keep.col.order) numby <- c(if(keep.by) numby, rep_len(numw, length(wFUN))) } else { if(isTRUE(give.names)) namw <- paste(namwFUN, namw, sep = ".") by[[4L]] <- c(if(keep.by) by[[4L]], `names<-`(list(wFUN(w, g = by, ..., use.g.names = FALSE)), namw)) if(keep.col.order) numby <- c(if(keep.by) numby, numw) # need to accommodate any option of keep.by, keep.w and keep.col.order } keep.by <- TRUE } } if(ncustoml) { # Identifying data nu <- .Call(C_vtypes, X, 1L) # vapply(unattrib(X), is.numeric, TRUE) if(vl) { temp <- nu[v] nnu <- v[!temp] # which(!nu & v) # faster way ? nu <- v[temp] # which(nu & v) rm(temp, v) } else { nnu <- whichv(nu, FALSE) nu <- which(nu) } nul <- length(nu) > 0L nnul <- length(nnu) > 0L # Identifying FUN and catFUN: if(nul) if(is.function(FUN)) { namFUN <- l1orn(as.character(substitute(FUN)), "FUN") } else if(is.character(FUN)) { # FUN <- unlist(strsplit(FUN,",",fixed = TRUE), use.names = FALSE) namFUN <- FUN FUN <- if(length(FUN) > 1L) lapply(FUN, mymatchfun) else mymatchfun(FUN) } else if(is.list(FUN)) { namFUN <- names(FUN) if(is.null(namFUN)) namFUN <- all.vars(substitute(FUN)) } else stop("FUN needs to be a function, character vector of function names or list of functions!") if(nnul) if(is.function(catFUN)) { namcatFUN <- l1orn(as.character(substitute(catFUN)), "catFUN") } else if(is.character(catFUN)) { # catFUN <- unlist(strsplit(catFUN,",",fixed = TRUE), use.names = FALSE) namcatFUN <- catFUN catFUN <- if(length(catFUN) > 1L) lapply(catFUN, mymatchfun) else mymatchfun(catFUN) } else if(is.list(catFUN)) { namcatFUN <- names(catFUN) if(is.null(namcatFUN)) namcatFUN <- all.vars(substitute(catFUN)) } else stop("FUN needs to be a function, character vector of function names or list of functions!") if(autorn) give.names <- !widel || length(FUN) > 1L || length(catFUN) > 1L # Aggregator function # drop level of nesting i.e. make rest length(by)+length(FUN)+length(catFUN) ? agg <- function(xnu, xnnu, ...) { # by, FUN, namFUN, catFUN, namcatFUN, drop.by lr <- nul + nnul + keep.by res <- vector("list", lr) if(keep.by) { res[[1L]] <- list(by[[4L]]) # could add later using "c" ? ind <- 2L } else ind <- 1L if(nul) res[[ind]] <- condsetn(applyfuns_internal(xnu, by, FUN, namFUN %in% .FAST_STAT_FUN_EXT, parallel, mc.cores, ...), namFUN, give.names) if(nnul) res[[lr]] <- condsetn(applyfuns_internal(xnnu, by, catFUN, namcatFUN %in% .FAST_STAT_FUN_EXT, parallel, mc.cores, ...), namcatFUN, give.names) return(res) } # fastest using res list ?? or better combine at the end ?? # Fixes https://github.com/SebKrantz/collapse/issues/185 if(widel && !give.names && ((length(nu) == 1L && !nnul && length(FUN) > 1L) || (length(nnu) == 1L && !nul && length(catFUN) > 1L))) { names(X) <- NULL give.names <- TRUE } if(nwl) { res <- agg(if(nul) X[nu] else NULL, if(nnul) X[nnu] else NULL, ...) } else { res <- agg(if(nul) X[nu] else NULL, if(nnul) X[nnu] else NULL, w = w, ...) } if(keep.col.order && widel) o <- forder.int(c(if(!keep.by) NULL else if(!bycalll) rep(0L,length(numby)) else numby, if(nul) rep(nu,length(FUN)) else NULL, if(nnul) rep(nnu,length(catFUN)) else NULL)) } else { # custom aggregation: namFUN <- names(custom) if(!is.list(custom) || is.null(namFUN)) stop("custom needs to be a named list, see ?collap") fFUN <- namFUN %in% .FAST_STAT_FUN_EXT if(!keep.by) { res <- vector("list", 1L) ind <- 1L } else { res <- vector("list", 2L) res[[1L]] <- list(by[[4L]]) # could add later using "c" ? ind <- 2L } custom_names <- lapply(custom, names) custom <- lapply(custom, cols2int, X, nam) # could integrate below, but then reorder doesn't work ! # if(autorn) give.names <- fanyDuplicated(unlist(custom, FALSE, FALSE)) #lx <- length(X) # custom <- lapply(custom, function(x) if(is.numeric(x) && bmax(abs(x)) <= lx) # x else if(is.character(x)) ckmatch(x, nam) else # stop("custom list content must be variable names or suitable column indices")) if(nwl) { res[[ind]] <- lapply(seq_along(namFUN), function(i) applyfuns_internal(setnck(X[custom[[i]]], custom_names[[i]]), by, mymatchfun(namFUN[i]), fFUN[i], parallel, mc.cores, ...)[[1L]]) } else { if(!all(fFUN)) warning("collap can only perform weighted aggregations with the fast statistical functions (see .FAST_STAT_FUN): Ignoring weights argument to other functions") res[[ind]] <- lapply(seq_along(namFUN), function(i) applyfuns_internal(setnck(X[custom[[i]]], custom_names[[i]]), by, mymatchfun(namFUN[i]), fFUN[i], parallel, mc.cores, w = w, ...)[[1L]]) } # Better to do this check afterwards, because custom names may make column names unique... if(autorn) give.names <- fanyDuplicated(unlist(lapply(res[[ind]], attr, "names"), FALSE, FALSE)) if(give.names) names(res[[ind]]) <- namFUN if(keep.col.order && return != 2L) { # && widel o <- unlist(custom, use.names = FALSE) o <- forder.int(c(if(!keep.by) NULL else if(!bycalll) rep(0L,length(numby)) else numby, if(widel) o else unique.default(o))) } } # if(parallel) stopCluster(cl) if(widel) res <- unlist(unlist(res, FALSE), FALSE) else { if(length(FUN) > 1L || length(catFUN) > 1L || length(custom) > 1L) { res <- unlist(res, FALSE) if(return == 2L) { ax[["row.names"]] <- .set_row_names(by[[1L]]) if(!keep.by) return(lapply(res, function(e) { ax[["names"]] <- names(e) condalcSA(e, ax, DTl) })) namby <- attr(res[[1L]], "names") # always works ?? return(lapply(res[-1L], function(e) { ax[["names"]] <- c(namby, names(e)) condalcSA(c(res[[1L]], e), ax, DTl) })) } else { if(return != 4L) { res <- if(!keep.by) .Call(C_rbindlist, res, TRUE, TRUE, "Function") else # data.table:::Crbindlist .Call(C_rbindlist, lapply(res[-1L], function(e) c(res[[1L]], e)), TRUE, TRUE, "Function") } else { if(!ncustoml || !(nul && nnul)) stop("long_dupl is only meaningful for aggregations with both numeric and categorical data, and multiple functions used for only one of the two data types!") mFUN <- length(FUN) > 1L nid <- if(mFUN) length(res) else 2L-!keep.by if(!keep.by) { res <- if(mFUN) lapply(res[-nid], function(e) c(e, res[[nid]])) else lapply(res[-nid], function(e) c(res[[nid]], e)) } else res <- if(mFUN) lapply(res[-c(nid, 1L)], function(e) c(res[[1L]], e, res[[nid]])) else lapply(res[-c(nid, 1L)], function(e) c(res[[1L]], res[[nid]], e)) res <- .Call(C_rbindlist, res, FALSE, FALSE, "Function") } if(keep.col.order) o <- if(ncustoml) forder.int(c(0L, if(!keep.by) NULL else if(!bycalll) rep(0L,length(numby)) else numby, nu, nnu)) else c(1L, o + 1L) } } else message("return options other than 'wide' are only meaningful if multiple functions are used!") } if(keep.col.order) .Call(C_setcolorder, res, o) # data.table:::Csetcolorder ax[["names"]] <- names(res) ax[["row.names"]] <- .set_row_names(length(res[[1L]])) return(condalcSA(res, ax, DTl)) } # collapv: allows vector input to by and w collapv <- function(X, by, FUN = fmean, catFUN = fmode, cols = NULL, w = NULL, wFUN = fsum, custom = NULL, keep.by = TRUE, keep.w = TRUE, keep.col.order = TRUE, sort = TRUE, decreasing = FALSE, na.last = TRUE, return.order = sort, method = "auto", parallel = FALSE, mc.cores = 2L, return = c("wide","list","long","long_dupl"), give.names = "auto", ...) { return <- switch(return[1L], wide = 1L, list = 2L, long = 3L, long_dupl = 4L, stop("Unknown return output option")) widel <- return == 1L ncustoml <- is.null(custom) autorn <- is.character(give.names) && give.names == "auto" nwl <- is.null(w) if(inherits(X, "data.frame")) DTl <- inherits(X, "data.table") else { X <- qDF(X) DTl <- FALSE } ax <- attributes(X) oldClass(X) <- NULL if(length(X[[1L]]) == 0L) stop("data passed to collapv() has 0 rows.") #160, 0 rows can cause segfault... nam <- names(X) aplyfun <- if(parallel) function(...) mclapply(..., mc.cores = mc.cores) else lapply # identifying by numby <- cols2int(by, X, nam) by <- GRP.default(X, numby, sort, decreasing, na.last, keep.by, return.order, method, call = FALSE) if(ncustoml) v <- if(is.null(cols)) seq_along(X)[-numby] else cols2int(cols, X, nam) if(!nwl) { if(length(w) == 1L) { numw <- cols2int(w, X, nam) namw <- nam[numw] if(ncustoml) v <- v[v != numw] w <- X[[numw]] } else if(keep.w) { numw <- 0L namw <- l1orlst(as.character(substitute(w))) } if(keep.w) { if(is.function(wFUN)) { namwFUN <- l1orn(as.character(substitute(wFUN)), "wFUN") } else if(is.character(wFUN)) { namwFUN <- wFUN wFUN <- if(length(wFUN) > 1L) lapply(wFUN, mymatchfun) else mymatchfun(wFUN) } else if(is.list(wFUN)) { namwFUN <- names(wFUN) if(is.null(namwFUN)) namwFUN <- all.vars(substitute(wFUN)) } else stop("wFUN needs to be a function, character vector of function names or list of functions!") if(!all(namwFUN %in% .FAST_STAT_FUN_EXT)) stop("wFUN needs to be fast statistical functions, see print(.FAST_STAT_FUN)") if(is.list(wFUN)) { namw <- paste(namwFUN, namw, sep = ".") by[[4L]] <- c(if(keep.by) by[[4L]], `names<-`(lapply(wFUN, function(f) f(w, g = by, ..., use.g.names = FALSE)), namw)) if(keep.col.order) numby <- c(if(keep.by) numby, rep_len(numw, length(wFUN))) } else { if(isTRUE(give.names)) namw <- paste(namwFUN, namw, sep = ".") by[[4L]] <- c(if(keep.by) by[[4L]], `names<-`(list(wFUN(w, g = by, ..., use.g.names = FALSE)), namw)) if(keep.col.order) numby <- c(if(keep.by) numby, numw) } keep.by <- TRUE } } if(ncustoml) { # Identifying data nu <- .Call(C_vtypes, X, 1L) # vapply(unattrib(X), is.numeric, TRUE) temp <- nu[v] nnu <- v[!temp] # which(!nu & v) # faster way ? nu <- v[temp] # which(nu & v) rm(temp, v) nul <- length(nu) > 0L nnul <- length(nnu) > 0L # Identifying FUN and catFUN: if(nul) if(is.function(FUN)) { namFUN <- l1orn(as.character(substitute(FUN)), "FUN") } else if(is.character(FUN)) { namFUN <- FUN FUN <- if(length(FUN) > 1L) lapply(FUN, mymatchfun) else mymatchfun(FUN) } else if(is.list(FUN)) { namFUN <- names(FUN) if(is.null(namFUN)) namFUN <- all.vars(substitute(FUN)) } else stop("FUN needs to be a function, character vector of function names or list of functions!") if(nnul) if(is.function(catFUN)) { namcatFUN <- l1orn(as.character(substitute(catFUN)), "catFUN") } else if(is.character(catFUN)) { namcatFUN <- catFUN catFUN <- if(length(catFUN) > 1L) lapply(catFUN, mymatchfun) else mymatchfun(catFUN) } else if(is.list(catFUN)) { namcatFUN <- names(catFUN) if(is.null(namcatFUN)) namcatFUN <- all.vars(substitute(catFUN)) } else stop("FUN needs to be a function, character vector of function names or list of functions!") if(autorn) give.names <- !widel || length(FUN) > 1L || length(catFUN) > 1L # Aggregator function agg <- function(xnu, xnnu, ...) { lr <- nul + nnul + keep.by res <- vector("list", lr) if(keep.by) { res[[1L]] <- list(by[[4L]]) ind <- 2L } else ind <- 1L if(nul) res[[ind]] <- condsetn(applyfuns_internal(xnu, by, FUN, namFUN %in% .FAST_STAT_FUN_EXT, parallel, mc.cores, ...), namFUN, give.names) if(nnul) res[[lr]] <- condsetn(applyfuns_internal(xnnu, by, catFUN, namcatFUN %in% .FAST_STAT_FUN_EXT, parallel, mc.cores, ...), namcatFUN, give.names) return(res) } # Fixes https://github.com/SebKrantz/collapse/issues/185 if(widel && !give.names && ((length(nu) == 1L && !nnul && length(FUN) > 1L) || (length(nnu) == 1L && !nul && length(catFUN) > 1L))) { names(X) <- NULL give.names <- TRUE } if(nwl) { res <- agg(if(nul) X[nu] else NULL, if(nnul) X[nnu] else NULL, ...) } else { res <- agg(if(nul) X[nu] else NULL, if(nnul) X[nnu] else NULL, w = w, ...) } if(keep.col.order && widel) o <- forder.int(c(if(!keep.by) NULL else numby, if(nul) rep(nu,length(FUN)) else NULL, if(nnul) rep(nnu,length(catFUN)) else NULL)) } else { # custom aggregation: namFUN <- names(custom) if(!is.list(custom) || is.null(namFUN)) stop("custom needs to be a named list, see ?collap") fFUN <- namFUN %in% .FAST_STAT_FUN_EXT if(!keep.by) { res <- vector("list", 1L) ind <- 1L } else { res <- vector("list", 2L) res[[1L]] <- list(by[[4L]]) ind <- 2L } custom_names <- lapply(custom, names) custom <- lapply(custom, cols2int, X, nam) if(nwl) { res[[ind]] <- lapply(seq_along(namFUN), function(i) applyfuns_internal(setnck(X[custom[[i]]], custom_names[[i]]), by, mymatchfun(namFUN[i]), fFUN[i], parallel, mc.cores, ...)[[1L]]) } else { if(!all(fFUN)) warning("collap can only perform weighted aggregations with the fast statistical functions (see .FAST_STAT_FUN): Ignoring weights argument to other functions") res[[ind]] <- lapply(seq_along(namFUN), function(i) applyfuns_internal(setnck(X[custom[[i]]], custom_names[[i]]), by, mymatchfun(namFUN[i]), fFUN[i], parallel, mc.cores, w = w, ...)[[1L]]) } # Better to do this check afterwards, because custom names may make column names unique... if(autorn) give.names <- fanyDuplicated(unlist(lapply(res[[ind]], attr, "names"), FALSE, FALSE)) if(give.names) names(res[[ind]]) <- namFUN if(keep.col.order && return != 2L) { o <- unlist(custom, use.names = FALSE) o <- forder.int(c(if(!keep.by) NULL else numby, if(widel) o else unique.default(o))) } } if(widel) res <- unlist(unlist(res, FALSE), FALSE) else { if(length(FUN) > 1L || length(catFUN) > 1L || length(custom) > 1L) { res <- unlist(res, FALSE) if(return == 2L) { ax[["row.names"]] <- .set_row_names(by[[1L]]) if(!keep.by) return(lapply(res, function(e) { ax[["names"]] <- names(e) condalcSA(e, ax, DTl) })) namby <- attr(res[[1L]], "names") # always works ?? return(lapply(res[-1L], function(e) { ax[["names"]] <- c(namby, names(e)) condalcSA(c(res[[1L]], e), ax, DTl) })) } else { if(return != 4L) { res <- if(!keep.by) .Call(C_rbindlist, res, TRUE, TRUE, "Function") else # data.table:::Crbindlist .Call(C_rbindlist, lapply(res[-1L], function(e) c(res[[1L]], e)), TRUE, TRUE, "Function") } else { if(!ncustoml || !(nul && nnul)) stop("long_dupl is only meaningful for aggregations with both numeric and categorical data, and multiple functions used for only one of the two data types!") mFUN <- length(FUN) > 1L nid <- if(mFUN) length(res) else 2L-!keep.by if(!keep.by) { res <- if(mFUN) lapply(res[-nid], function(e) c(e, res[[nid]])) else lapply(res[-nid], function(e) c(res[[nid]], e)) } else res <- if(mFUN) lapply(res[-c(nid, 1L)], function(e) c(res[[1L]], e, res[[nid]])) else lapply(res[-c(nid, 1L)], function(e) c(res[[1L]], res[[nid]], e)) res <- .Call(C_rbindlist, res, FALSE, FALSE, "Function") } if(keep.col.order) o <- if(ncustoml) forder.int(c(0L, if(!keep.by) NULL else numby, nu, nnu)) else c(1L, o + 1L) } } else message("return options other than 'wide' are only meaningful if multiple functions are used!") } if(keep.col.order) .Call(C_setcolorder, res, o) # data.table:::Csetcolorder ax[["names"]] <- names(res) ax[["row.names"]] <- .set_row_names(length(res[[1L]])) return(condalcSA(res, ax, DTl)) } # For dplyr integration: takes grouped_df as input collapg <- function(X, FUN = fmean, catFUN = fmode, cols = NULL, w = NULL, wFUN = fsum, custom = NULL, keep.group_vars = TRUE, keep.w = TRUE, keep.col.order = TRUE, parallel = FALSE, mc.cores = 2L, return = c("wide","list","long","long_dupl"), give.names = "auto", ...) { by <- GRP.grouped_df(X, return.groups = keep.group_vars, call = FALSE) if(is.null(custom)) ngn <- attr(X, "names") %!in% by[[5L]] # Note: this always leaves grouping columns on the left still ! # clx <- oldClass(X) attr(X, "groups") <- NULL oldClass(X) <- fsetdiff(oldClass(X), c("GRP_df", "grouped_df")) # clx[clx != "grouped_df"] if(length(wsym <- as.character(substitute(w))) == 1L) { # Non-standard evaluation of w argument if(any(windl <- wsym == attr(X, "names"))) { assign(wsym, .subset2(X, wsym)) # needs to be here !! (before subsetting!!) if(is.null(custom)) X <- fcolsubset(X, ngn & !windl) # else X <- X # Needed ?? -> nope !! expr <- substitute(collap(X, by, FUN, catFUN, cols, NULL, wFUN, custom, keep.group_vars, keep.w, keep.col.order, TRUE, FALSE, TRUE, TRUE, "auto", parallel, mc.cores, return, give.names, ...)) expr[[7L]] <- as.symbol(wsym) # best solution !! return(eval(expr)) } } if(is.null(custom)) X <- fcolsubset(X, ngn) # else X <- X # because of non-standard eval.. X is "." return(eval(substitute(collap(X, by, FUN, catFUN, cols, w, wFUN, custom, keep.group_vars, keep.w, keep.col.order, TRUE, FALSE, TRUE, TRUE, "auto", parallel, mc.cores, return, give.names, ...)))) } collapse/R/fbetween_fwithin.R0000644000176200001440000004212314174223734015740 0ustar liggesusers ckm <- function(x) if(is.double(x)) x else if(is.character(x) && x == "overall.mean") -Inf else stop("mean must be a number or 'overall.mean'") # better than switch !! # Note: for principal innovations of this code see fsum.R and fscale.R fwithin <- function(x, ...) UseMethod("fwithin") # , x fwithin.default <- function(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, theta = 1, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fwithin.matrix(x, g, w, na.rm, mean, theta, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_BW,x,0L,0L,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE)) g <- G_guo(g) .Call(Cpp_BW,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE) } fwithin.pseries <- function(x, effect = 1L, w = NULL, na.rm = TRUE, mean = 0, theta = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- if(length(effect) == 1L) .subset2(getpix(attr(x, "index")), effect) else finteraction(.subset(getpix(attr(x, "index")), effect)) .Call(Cpp_BW,x,fnlevels(g),g,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE) } fwithin.matrix <- function(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, theta = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_BWm,x,0L,0L,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE)) g <- G_guo(g) .Call(Cpp_BWm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE) } fwithin.data.frame <- function(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, theta = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_BWl,x,0L,0L,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE)) g <- G_guo(g) .Call(Cpp_BWl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE) } fwithin.list <- function(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, theta = 1, ...) fwithin.data.frame(x, g, w, na.rm, mean, theta, ...) fwithin.pdata.frame <- function(x, effect = 1L, w = NULL, na.rm = TRUE, mean = 0, theta = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- if(length(effect) == 1L) .subset2(getpix(attr(x, "index")), effect) else finteraction(.subset(getpix(attr(x, "index")), effect)) .Call(Cpp_BWl,x,fnlevels(g),g,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE) } fwithin.grouped_df <- function(x, w = NULL, na.rm = TRUE, mean = 0, theta = 1, keep.group_vars = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- l1orn(as.character(substitute(w)), NULL) nam <- attr(x, "names") gn2 <- which(nam %in% g[[5L]]) gn <- if(keep.group_vars) gn2 else NULL if(length(wsym) && length(wn <- whichv(nam, wsym))) { w <- .subset2(x, wn) if(any(gn2 == wn)) stop("Weights coincide with grouping variables!") gn2 <- c(gn2,wn) if(keep.w) gn <- c(gn,wn) } if(length(gn2)) { ax <- attributes(x) ax[["names"]] <- c(nam[gn], nam[-gn2]) # first term is removed if !length(gn) res <- .Call(Cpp_BWl, .subset(x, -gn2), g[[1L]],g[[2L]],g[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE) if(length(gn)) return(setAttributes(c(.subset(x, gn), res), ax)) else return(setAttributes(res, ax)) } .Call(Cpp_BWl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE) } # Within Operator W <- function(x, ...) UseMethod("W") # , x W.default <- function(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, theta = 1, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(W.matrix(x, g, w, na.rm, mean, theta, ...)) fwithin.default(x, g, w, na.rm, mean, theta, ...) } W.pseries <- function(x, effect = 1L, w = NULL, na.rm = TRUE, mean = 0, theta = 1, ...) fwithin.pseries(x, effect, w, na.rm, mean, theta, ...) W.matrix <- function(x, g = NULL, w = NULL, na.rm = TRUE, mean = 0, theta = 1, stub = "W.", ...) add_stub(fwithin.matrix(x, g, w, na.rm, mean, theta, ...), stub) W.grouped_df <- function(x, w = NULL, na.rm = TRUE, mean = 0, theta = 1, stub = "W.", keep.group_vars = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- l1orn(as.character(substitute(w)), NULL) nam <- attr(x, "names") gn2 <- which(nam %in% g[[5L]]) gn <- if(keep.group_vars) gn2 else NULL if(length(wsym) && length(wn <- whichv(nam, wsym))) { w <- .subset2(x, wn) if(any(gn2 == wn)) stop("Weights coincide with grouping variables!") gn2 <- c(gn2,wn) if(keep.w) gn <- c(gn, wn) } if(length(gn2)) { ax <- attributes(x) ax[["names"]] <- c(nam[gn], if(is.character(stub)) paste0(stub, nam[-gn2]) else nam[-gn2]) res <- .Call(Cpp_BWl, .subset(x, -gn2), g[[1L]],g[[2L]],g[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE) if(length(gn)) return(setAttributes(c(.subset(x, gn), res), ax)) else return(setAttributes(res, ax)) } add_stub(.Call(Cpp_BWl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE), stub) } W.pdata.frame <- function(x, effect = 1L, w = NULL, cols = is.numeric, na.rm = TRUE, mean = 0, theta = 1, stub = "W.", keep.ids = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) ax <- attributes(x) class(x) <- NULL nam <- names(x) g <- if(length(effect) == 1L) .subset2(getpix(ax[["index"]]), effect) else finteraction(.subset(getpix(ax[["index"]]), effect)) if(keep.ids) { gn <- which(nam %in% attr(getpix(ax[["index"]]), "names")) if(length(gn) && is.null(cols)) cols <- seq_along(x)[-gn] } else gn <- NULL if(length(cols)) cols <- cols2int(cols, x, nam) if(is.call(w)) { wn <- ckmatch(all.vars(w), nam) w <- x[[wn]] cols <- if(is.null(cols)) seq_along(x)[-wn] else cols[cols != wn] if(keep.w) gn <- c(gn, wn) } if(length(gn) && length(cols)) { ax[["names"]] <- c(nam[gn], if(is.character(stub)) paste0(stub, nam[cols]) else nam[cols]) return(setAttributes(c(x[gn], .Call(Cpp_BWl,x[cols],fnlevels(g),g,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE)), ax)) } else if(!length(gn)) { ax[["names"]] <- if(is.character(stub)) paste0(stub, nam[cols]) else nam[cols] return(setAttributes(.Call(Cpp_BWl,x[cols],fnlevels(g),g,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE), ax)) } else if(is.character(stub)) { ax[["names"]] <- paste0(stub, nam) return(setAttributes(.Call(Cpp_BWl,x,fnlevels(g),g,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE), ax)) } else return(.Call(Cpp_BWl,`oldClass<-`(x, ax[["class"]]),fnlevels(g),g,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE)) } W.data.frame <- function(x, by = NULL, w = NULL, cols = is.numeric, na.rm = TRUE, mean = 0, theta = 1, stub = "W.", keep.by = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.call(by) || is.call(w)) { ax <- attributes(x) class(x) <- NULL nam <- names(x) if(is.call(by)) { if(length(by) == 3L) { cols <- ckmatch(all.vars(by[[2L]]), nam) gn <- ckmatch(all.vars(by[[3L]]), nam) } else { gn <- ckmatch(all.vars(by), nam) cols <- if(is.null(cols)) seq_along(x)[-gn] else cols2int(cols, x, nam) } by <- G_guo(if(length(gn) == 1L) x[[gn]] else x[gn]) if(!keep.by) gn <- NULL } else { gn <- NULL if(length(cols)) cols <- cols2int(cols, x, nam) by <- if(is.null(by)) list(0L, 0L, NULL) else G_guo(by) } if(is.call(w)) { wn <- ckmatch(all.vars(w), nam) w <- x[[wn]] cols <- if(is.null(cols)) seq_along(x)[-wn] else cols[cols != wn] if(keep.w) gn <- c(gn, wn) } if(length(gn)) { ax[["names"]] <- c(nam[gn], if(is.character(stub)) paste0(stub, nam[cols]) else nam[cols]) return(setAttributes(c(x[gn], .Call(Cpp_BWl,x[cols],by[[1L]],by[[2L]],by[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE)), ax)) } ax[["names"]] <- if(is.character(stub)) paste0(stub, nam[cols]) else nam[cols] return(setAttributes(.Call(Cpp_BWl,x[cols],by[[1L]],by[[2L]],by[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE), ax)) } else if(length(cols)) { # Need to do like this, otherwise list-subsetting drops attributes ! ax <- attributes(x) class(x) <- NULL x <- x[cols2int(cols, x, names(x), FALSE)] ax[["names"]] <- names(x) setattributes(x, ax) } if(is.character(stub)) attr(x, "names") <- paste0(stub, attr(x, "names")) if(is.null(by)) return(.Call(Cpp_BWl,x,0L,0L,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE)) by <- G_guo(by) .Call(Cpp_BWl,x,by[[1L]],by[[2L]],by[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE) } W.list <- function(x, by = NULL, w = NULL, cols = is.numeric, na.rm = TRUE, mean = 0, theta = 1, stub = "W.", keep.by = TRUE, keep.w = TRUE, ...) W.data.frame(x, by, w, cols, na.rm, mean, theta, stub, keep.by, keep.w, ...) fbetween <- function(x, ...) UseMethod("fbetween") # , x fbetween.default <- function(x, g = NULL, w = NULL, na.rm = TRUE, fill = FALSE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fbetween.matrix(x, g, w, na.rm, fill, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_BW,x,0L,0L,NULL,w,na.rm,1,0,TRUE,fill)) g <- G_guo(g) .Call(Cpp_BW,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,1,0,TRUE,fill) } fbetween.pseries <- function(x, effect = 1L, w = NULL, na.rm = TRUE, fill = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- if(length(effect) == 1L) .subset2(getpix(attr(x, "index")), effect) else finteraction(.subset(getpix(attr(x, "index")), effect)) .Call(Cpp_BW,x,fnlevels(g),g,NULL,w,na.rm,1,0,TRUE,fill) } fbetween.matrix <- function(x, g = NULL, w = NULL, na.rm = TRUE, fill = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_BWm,x,0L,0L,NULL,w,na.rm,1,0,TRUE,fill)) g <- G_guo(g) .Call(Cpp_BWm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,1,0,TRUE,fill) } fbetween.data.frame <- function(x, g = NULL, w = NULL, na.rm = TRUE, fill = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_BWl,x,0L,0L,NULL,w,na.rm,1,0,TRUE,fill)) g <- G_guo(g) .Call(Cpp_BWl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,1,0,TRUE,fill) } fbetween.list <- function(x, g = NULL, w = NULL, na.rm = TRUE, fill = FALSE, ...) fbetween.data.frame(x, g, w, na.rm, fill, ...) fbetween.pdata.frame <- function(x, effect = 1L, w = NULL, na.rm = TRUE, fill = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- if(length(effect) == 1L) .subset2(getpix(attr(x, "index")), effect) else finteraction(.subset(getpix(attr(x, "index")), effect)) .Call(Cpp_BWl,x,fnlevels(g),g,NULL,w,na.rm,1,0,TRUE,fill) } fbetween.grouped_df <- function(x, w = NULL, na.rm = TRUE, fill = FALSE, keep.group_vars = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- l1orn(as.character(substitute(w)), NULL) nam <- attr(x, "names") gn2 <- which(nam %in% g[[5L]]) gn <- if(keep.group_vars) gn2 else NULL if(length(wsym) && length(wn <- whichv(nam, wsym))) { w <- .subset2(x, wn) if(any(gn2 == wn)) stop("Weights coincide with grouping variables!") gn2 <- c(gn2,wn) if(keep.w) gn <- c(gn,wn) } if(length(gn2)) { ax <- attributes(x) ax[["names"]] <- c(nam[gn], nam[-gn2]) # first term is removed if !length(gn) res <- .Call(Cpp_BWl, .subset(x, -gn2), g[[1L]],g[[2L]],g[[3L]],w,na.rm,1,0,TRUE,fill) if(length(gn)) return(setAttributes(c(.subset(x, gn), res), ax)) else return(setAttributes(res, ax)) } .Call(Cpp_BWl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,1,0,TRUE,fill) } # Between Operator B <- function(x, ...) UseMethod("B") # , x B.default <- function(x, g = NULL, w = NULL, na.rm = TRUE, fill = FALSE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(B.matrix(x, g, w, na.rm, fill, ...)) fbetween.default(x, g, w, na.rm, fill, ...) } B.pseries <- function(x, effect = 1L, w = NULL, na.rm = TRUE, fill = FALSE, ...) fbetween.pseries(x, effect, w, na.rm, fill, ...) B.matrix <- function(x, g = NULL, w = NULL, na.rm = TRUE, fill = FALSE, stub = "B.", ...) add_stub(fbetween.matrix(x, g, w, na.rm, fill, ...), stub) B.grouped_df <- function(x, w = NULL, na.rm = TRUE, fill = FALSE, stub = "B.", keep.group_vars = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- l1orn(as.character(substitute(w)), NULL) nam <- attr(x, "names") gn2 <- which(nam %in% g[[5L]]) gn <- if(keep.group_vars) gn2 else NULL if(length(wsym) && length(wn <- whichv(nam, wsym))) { w <- .subset2(x, wn) if(any(gn2 == wn)) stop("Weights coincide with grouping variables!") gn2 <- c(gn2,wn) if(keep.w) gn <- c(gn, wn) } if(length(gn2)) { ax <- attributes(x) ax[["names"]] <- c(nam[gn], if(is.character(stub)) paste0(stub, nam[-gn2]) else nam[-gn2]) res <- .Call(Cpp_BWl, .subset(x, -gn2), g[[1L]],g[[2L]],g[[3L]],w,na.rm,1,0,TRUE,fill) if(length(gn)) return(setAttributes(c(.subset(x, gn), res), ax)) else return(setAttributes(res, ax)) } add_stub(.Call(Cpp_BWl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,1,0,TRUE,fill), stub) } B.pdata.frame <- function(x, effect = 1L, w = NULL, cols = is.numeric, na.rm = TRUE, fill = FALSE, stub = "B.", keep.ids = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) ax <- attributes(x) class(x) <- NULL nam <- names(x) g <- if(length(effect) == 1L) .subset2(getpix(ax[["index"]]), effect) else finteraction(.subset(getpix(ax[["index"]]), effect)) if(keep.ids) { gn <- which(nam %in% attr(getpix(ax[["index"]]), "names")) if(length(gn) && is.null(cols)) cols <- seq_along(x)[-gn] } else gn <- NULL if(length(cols)) cols <- cols2int(cols, x, nam) if(is.call(w)) { wn <- ckmatch(all.vars(w), nam) w <- x[[wn]] cols <- if(is.null(cols)) seq_along(x)[-wn] else cols[cols != wn] if(keep.w) gn <- c(gn, wn) } if(length(gn) && length(cols)) { ax[["names"]] <- c(nam[gn], if(is.character(stub)) paste0(stub, nam[cols]) else nam[cols]) return(setAttributes(c(x[gn], .Call(Cpp_BWl,x[cols],fnlevels(g),g,NULL,w,na.rm,1,0,TRUE,fill)), ax)) } else if(!length(gn)) { ax[["names"]] <- if(is.character(stub)) paste0(stub, nam[cols]) else nam[cols] return(setAttributes(.Call(Cpp_BWl,x[cols],fnlevels(g),g,NULL,w,na.rm,1,0,TRUE,fill), ax)) } else if(is.character(stub)) { ax[["names"]] <- paste0(stub, nam) return(setAttributes(.Call(Cpp_BWl,x,fnlevels(g),g,NULL,w,na.rm,1,0,TRUE,fill), ax)) } else return(.Call(Cpp_BWl,`oldClass<-`(x, ax[["class"]]),fnlevels(g),g,NULL,w,na.rm,1,0,TRUE,fill)) } B.data.frame <- function(x, by = NULL, w = NULL, cols = is.numeric, na.rm = TRUE, fill = FALSE, stub = "B.", keep.by = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.call(by) || is.call(w)) { ax <- attributes(x) class(x) <- NULL nam <- names(x) if(is.call(by)) { if(length(by) == 3L) { cols <- ckmatch(all.vars(by[[2L]]), nam) gn <- ckmatch(all.vars(by[[3L]]), nam) } else { gn <- ckmatch(all.vars(by), nam) cols <- if(is.null(cols)) seq_along(x)[-gn] else cols2int(cols, x, nam) } by <- G_guo(if(length(gn) == 1L) x[[gn]] else x[gn]) if(!keep.by) gn <- NULL } else { gn <- NULL if(length(cols)) cols <- cols2int(cols, x, nam) by <- if(is.null(by)) list(0L, 0L, NULL) else G_guo(by) } if(is.call(w)) { wn <- ckmatch(all.vars(w), nam) w <- x[[wn]] cols <- if(is.null(cols)) seq_along(x)[-wn] else cols[cols != wn] if(keep.w) gn <- c(gn, wn) } if(length(gn)) { ax[["names"]] <- c(nam[gn], if(is.character(stub)) paste0(stub, nam[cols]) else nam[cols]) return(setAttributes(c(x[gn], .Call(Cpp_BWl,x[cols],by[[1L]],by[[2L]],by[[3L]],w,na.rm,1,0,TRUE,fill)), ax)) } ax[["names"]] <- if(is.character(stub)) paste0(stub, nam[cols]) else nam[cols] return(setAttributes(.Call(Cpp_BWl,x[cols],by[[1L]],by[[2L]],by[[3L]],w,na.rm,1,0,TRUE,fill), ax)) } else if(length(cols)) { # Necessary, else attributes are dropped by list-subsetting ! ax <- attributes(x) class(x) <- NULL x <- x[cols2int(cols, x, names(x), FALSE)] ax[["names"]] <- names(x) setattributes(x, ax) } if(is.character(stub)) attr(x, "names") <- paste0(stub, attr(x, "names")) if(is.null(by)) return(.Call(Cpp_BWl,x,0L,0L,NULL,w,na.rm,1,0,TRUE,fill)) by <- G_guo(by) .Call(Cpp_BWl,x,by[[1L]],by[[2L]],by[[3L]],w,na.rm,1,0,TRUE,fill) } B.list <- function(x, by = NULL, w = NULL, cols = is.numeric, na.rm = TRUE, fill = FALSE, stub = "B.", keep.by = TRUE, keep.w = TRUE, ...) B.data.frame(x, by, w, cols, na.rm, fill, stub, keep.by, keep.w, ...) collapse/R/fFtest.R0000644000176200001440000001045614167331346013651 0ustar liggesusers getdf <- function(x) { if(is.atomic(x)) if(is.factor(x)) return(fnlevels(x)-1L) else return(1L) bsum(vapply(unattrib(x), function(i) if(is.factor(i)) fnlevels(i)-1L else 1L, 1L)) } fFtest <- function(y, exc, X = NULL, w = NULL, full.df = TRUE, ...) { if(!is.numeric(y)) stop("y needs to be a numeric vector") if(!is.null(X)) { Xn <- fNCOL(X) atl <- is.atomic(X) && is.numeric(X) && is.atomic(exc) && is.numeric(exc) if(length(w)) { if(atl) { cc <- which(complete.cases(w, y, X, exc)) if(length(cc) < length(w)) { data <- cbind(y, X, exc)[cc, , drop = FALSE] w <- w[cc] } } else { data <- na_omit(qDF(c(list(w = w), list(y = y), qDF(X), qDF(exc)))) w <- .subset2(data, 1L) data[[1L]] <- NULL } } else { data <- if(atl) na_omit(cbind(y, X, exc)) else na_omit(qDF(c(list(y = y), qDF(X), qDF(exc)))) } if(full.df && !atl && any(fc <- .Call(C_vtypes, data, 2L))) { # vapply(unattrib(data), is.factor, TRUE) cld <- oldClass(data) oldClass(data) <- NULL data[fc] <- lapply(data[fc], fdroplevels.factor) df <- vapply(unattrib(data), function(i) if(is.factor(i)) fnlevels(i)-1L else 1L, 1L) # getdf(data) k <- bsum(df) # 1 for intercept added with y p <- bsum(df[(Xn+2L):length(df)]) y <- data[[1L]] oldClass(data) <- cld } else { p <- fNCOL(exc) if(atl) { k <- ncol(data) # 1 for intercept added with y y <- data[, 1L] } else { k <- length(unclass(data)) # 1 for intercept added with y y <- .subset2(data, 1L) } } kr <- k-p-1L vy <- fvar.default(y, w = w) if(atl) { n <- nrow(data) r2f <- 1 - fvar.default(fhdwithin.default(y, data[, -1L], w, na.rm = FALSE, ...), w = w)/vy r2r <- 1 - fvar.default(fhdwithin.default(y, data[, 2:(Xn+1L)], w, na.rm = FALSE, ...), w = w)/vy } else { n <- fnrow2(data) r2f <- 1 - fvar.default(fhdwithin.default(y, fcolsubset(data, -1L), w, na.rm = FALSE, ...), w = w)/vy r2r <- 1 - fvar.default(fhdwithin.default(y, fcolsubset(data, 2:(Xn+1L)), w, na.rm = FALSE, ...), w = w)/vy } ndff <- k-1L ddff <- n-k Fstatf <- r2f/ndff * ddff/(1-r2f) pf <- pf(Fstatf, ndff, ddff, lower.tail = FALSE) ddfr <- n-kr-1L Fstatr <- r2r/kr * ddfr/(1-r2r) pr <- pf(Fstatr, kr, ddfr, lower.tail = FALSE) Fstate <- (r2f - r2r)/p * ddff/(1-r2f) pe <- pf(Fstate, p, ddff, lower.tail = FALSE) res <- matrix(c(r2f, ndff, ddff, Fstatf, pf, r2r, kr, ddfr, Fstatr, pr, r2f-r2r, p, ddff, Fstate, pe), nrow = 3L, ncol = 5L, byrow = TRUE, dimnames = list(c("Full Model","Restricted Model","Exclusion Rest."), c("R-Sq.","DF1","DF2","F-Stat.","P-Value"))) oldClass(res) <- c("fFtest","matrix") } else { u <- fhdwithin.default(y, exc, w, na.rm = TRUE, ...) # Residuals miss <- attr(u, "na.rm") if(!is.null(miss)) w <- w[-miss] if(full.df && length(miss) && !is.atomic(exc) && !is.numeric(exc)) { p <- if(is.factor(exc)) fnlevels(exc[-miss, drop = TRUE])-1L else if(any(.Call(C_vtypes, exc, 2L))) # vapply(unattrib(exc), is.factor, TRUE) getdf(fdroplevels.data.frame(ss(exc, -miss))) else length(unclass(exc)) } else if(full.df) { p <- if(is.factor(exc) || (is.list(exc) && any(.Call(C_vtypes, exc, 2L)))) getdf(fdroplevels(exc)) else fNCOL(exc) # vapply(unattrib(exc), is.factor, TRUE) } else p <- fNCOL(exc) n <- length(u) r2 <- 1 - fvar.default(u, w = w)/fvar.default(if(is.null(miss)) y else y[-miss], w = w) # R-Squared ddf <- n-p-1L Fstat <- r2/p * ddf/(1-r2) # F statistic for the model (the constant goes unrestricted) Pv <- pf(Fstat, p, ddf, lower.tail = FALSE) # P-value corresponding to the F statistic res <- c(`R-Sq.` = r2, `DF1` = p, `DF2` = ddf, `F-Stat.` = Fstat, `P-value` = Pv) oldClass(res) <- "fFtest" } res } print.fFtest <- function(x, digits = 3, ...) { xx <- unclass(format(round(x, digits))) xpos <- x >= 1 xx[xpos] <- sub(paste0(c(".", rep("0",digits)), collapse = ""), "", xx[xpos]) # Problem: Deletes .00 also.. print.default(xx, quote = FALSE, right = TRUE, ...) } collapse/R/GRP.R0000644000176200001440000010760414201327077013043 0ustar liggesusers# Cuniqlengths <- data.table:::Cuniqlengths # Cfrank <- data.table:::Cfrank # forderv <- data.table:::forderv GRP <- function(X, ...) UseMethod("GRP") # , X radixorder <- function(..., na.last = TRUE, decreasing = FALSE, starts = FALSE, group.sizes = FALSE, sort = TRUE) { z <- pairlist(...) decreasing <- rep_len(as.logical(decreasing), length(z)) .Call(C_radixsort, na.last, decreasing, starts, group.sizes, sort, z) } radixorderv <- function(x, na.last = TRUE, decreasing = FALSE, starts = FALSE, group.sizes = FALSE, sort = TRUE) { z <- if(is.atomic(x)) pairlist(x) else as.pairlist(unclass(x)) decreasing <- rep_len(as.logical(decreasing), length(z)) .Call(C_radixsort, na.last, decreasing, starts, group.sizes, sort, z) } switchGRP <- function(x, na.last = TRUE, decreasing = FALSE, starts = FALSE, group.sizes = FALSE, sort = TRUE, use.group = FALSE) { if(use.group) return(.Call(C_group, x, starts, group.sizes)) z <- if(is.atomic(x)) pairlist(x) else as.pairlist(unclass(x)) decreasing <- rep_len(as.logical(decreasing), length(z)) .Call(C_radixsort, na.last, decreasing, starts, group.sizes, sort, z) } # Added... could also do in GRP.default... but this is better, no match.call etc... match.call takes 4 microseconds. could do both ?? think about possible applications... GRP.GRP <- function(X, ...) X GRP.default <- function(X, by = NULL, sort = TRUE, decreasing = FALSE, na.last = TRUE, return.groups = TRUE, return.order = sort, method = "auto", call = TRUE, ...) { # , gs = TRUE # o # if(!missing(...)) { # args <- list(...) # namarg <- names(args) # if(any(namarg == "order")) { # decreasing <- args[["order"]] == 1L # ... == 1L # warning("'order' has been replaced with 'decreasing' and now takes logical arguments. 'order' can still be used but may be removed at some point.") # # if(length(args) > 1L && !(length(args) == 2L && any(namarg == "group.sizes"))) # # unused_arg_action(match.call(), ...) # } # else if(length(args) != 1L || !any(namarg == "group.sizes")) # # unused_arg_action(match.call(), ...) # } use.group <- switch(method, auto = !sort, hash = TRUE, radix = FALSE, stop("method needs to be 'auto', 'hash' or 'radix'.")) if(is.na(na.last)) stop("here na.last needs to be TRUE or FALSE, otherwise the GRP object does not match the data dimensions.") if(is.list(X)) { if(inherits(X, "GRP")) return(X) # keep ?? if(is.null(by)) { by <- seq_along(unclass(X)) namby <- attr(X, "names") if(is.null(namby)) attr(X, "names") <- namby <- paste0("Group.", by) o <- switchGRP(X, na.last, decreasing, return.groups || !use.group, TRUE, sort, use.group) } else { if(is.call(by)) { namby <- all.vars(by, unique = FALSE) by <- ckmatch(namby, attr(X, "names")) } else if(is.character(by)) { namby <- by by <- ckmatch(by, attr(X, "names")) } else if(is.numeric(by)) { by <- as.integer(by) namby <- attr(X, "names")[by] if(is.null(namby)) { namby <- paste0("Group.", seq_along(by)) attr(X, "names") <- paste0("Group.", seq_along(unclass(X))) # best ? } } else stop("by needs to be either a one-sided formula, character column names or column indices!") o <- switchGRP(.subset(X, by), na.last, decreasing, return.groups || !use.group, TRUE, sort, use.group) } } else { if(length(by)) stop("by can only be used to subset list / data.frame columns") namby <- l1orlst(as.character(substitute(X))) # paste(all.vars(call), collapse = ".") # good in all circumstances ? o <- switchGRP(X, na.last, decreasing, return.groups || !use.group, TRUE, sort, use.group) } st <- attr(o, "starts") gs <- attr(o, "group.sizes") sorted <- if(use.group) NA else attr(o, "sorted") if(return.groups) { # if unit groups, don't subset rows... if(length(gs) == length(o) && (use.group || sorted)) { groups <- if(is.list(X)) .Call(C_subsetCols, X, by, FALSE) else `names<-`(list(X), namby) } else { ust <- if(use.group || sorted) st else .Call(C_subsetVector, o, st, FALSE) # o[st] groups <- if(is.list(X)) .Call(C_subsetDT, X, ust, by, FALSE) else `names<-`(list(.Call(C_subsetVector, X, ust, FALSE)), namby) # subsetVector preserves attributes (such as "label") } } else groups <- NULL return(`oldClass<-`(list(N.groups = length(gs), group.id = if(use.group) `attributes<-`(o, NULL) else .Call(C_frankds, o, st, gs, sorted), group.sizes = gs, groups = groups, group.vars = namby, ordered = c(GRP.sort = sort, initially.ordered = sorted), order = if(!return.order) NULL else if(use.group) `attr<-`(integer(0L), "starts", st) else .Call(C_setAttributes, o, attributes(o)[-2L]), # `attributes<-`(o, attributes(o)[-2L]) This does a shallow copy on newer R versions # `attr<-`(o, "group.sizes", NULL): This deep-copies it.. # starts = ust, Does not need to be computed by group() # maxgrpn = attr(o, "maxgrpn"), call = if(call) match.call() else NULL), "GRP")) } is_GRP <- function(x) inherits(x, "GRP") is.GRP <- function(x) { message("Note that 'is.GRP' was renamed to 'is_GRP'. It will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") inherits(x, "GRP") } length.GRP <- function(x) length(x[[2L]]) GRPnames <- function(x, force.char = TRUE) { # , ... groups <- x[[4L]] if(is.null(groups)) return(NULL) if(length(unclass(groups)) > 1L) return(do.call(paste, c(groups, list(sep = ".")))) if(force.char) tochar(.subset2(groups, 1L)) else .subset2(groups, 1L) # paste0(groups[[1L]]) prints "NA" but is slow, if assign with rownames<-, cannot have duplicate row names. But, attr<- "row.names" is fine !! } GRPN <- function(x, expand = TRUE, ...) { g <- GRP(x, sort = FALSE, return.groups = FALSE, return.order = FALSE, call = FALSE, ...) if(expand) .Call(C_subsetVector, g$group.sizes, g$group.id, FALSE) else g$group.sizes } # group_names.GRP <- function(x, force.char = TRUE) { # .Deprecated("GRPnames") # GRPnames(x, force.char) # } print.GRP <- function(x, n = 6, ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) ord <- x[[6L]] cat(paste0("collapse grouping object of length ", length(x[[2L]]), " with ", x[[1L]], if(anyNA(ord)) "" else if(any(ord)) " ordered" else " unordered", " groups"), fill = TRUE) cat("\nCall: ", paste0(deparse(x[["call"]]), if(is.na(ord[2L])) "" else if(ord[2L]) ", X is ordered" else ", X is unordered"), "\n\n", sep = "") cat("Distribution of group sizes: ", fill = TRUE) print.summaryDefault(summary.default(x[[3L]]), ...) if(!is.null(x[[4L]])) { ug <- unattrib(x[[4L]]) cat("\nGroups with sizes: ", fill = TRUE) if(length(ug) == 1L) { ug <- ug[[1L]] if(length(ug) > 2L*n) { ind <- seq.int(x[[1L]]-n+1L, x[[1L]]) print.default(setNames(x[[3L]][1:n], ug[1:n]), ...) cat(" ---", fill = TRUE) print.default(setNames(x[[3L]][ind], ug[ind]), ...) } else print.default(setNames(x[[3L]], ug), ...) } else { if(length(ug[[1L]]) > 2L*n) { ind <- seq.int(x[[1L]]-n+1L, x[[1L]]) print.default(setNames(x[[3L]][1:n], do.call(paste, c(lapply(ug, function(x) x[1:n]), list(sep = ".")))), ...) cat(" ---", fill = TRUE) print.default(setNames(x[[3L]][ind], do.call(paste, c(lapply(ug, function(x) x[ind]), list(sep = ".")))), ...) } else print.default(setNames(x[[3L]], do.call(paste, c(ug, list(sep = ".")))), ...) } } } plot.GRP <- function(x, breaks = "auto", type = "s", horizontal = FALSE, ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) oldpar <- par(mfrow = if(horizontal) 1:2 else 2:1, mar = c(3.9,4.1,2.1,1), mgp = c(2.5,1,0)) on.exit(par(oldpar)) if(breaks == "auto") { ugs <- length(funique(x[[3L]])) breaks <- if(ugs > 80) 80 else ugs } plot(seq_len(x[[1L]]), x[[3L]], type = type, xlab = "Group id", ylab = "Group Size", main = paste0("Sizes of ", x[[1L]], if(anyNA(x[[6L]])) "" else if(any(x[[6L]])) " Ordered" else " Unordered", " Groups"), frame.plot = FALSE, ...) # grid() if(breaks == 1L) plot(x[[3L]][1L], x[[1L]], type = "h", ylab = "Frequency", xlab = "Group Size", main = "Histogram of Group Sizes", frame.plot = FALSE, ...) else hist(x[[3L]], breaks, xlab = "Group Size", main = "Histogram of Group Sizes", ...) } as_factor_GRP <- function(x, ordered = FALSE) { # , ... # if(is.factor(x)) return(x) # if(!is_GRP(x)) stop("x must be a 'GRP' object") f <- x[[2L]] gr <- unclass(x[[4L]]) if(is.null(gr)) { attr(f, "levels") <- as.character(seq_len(x[[1L]])) } else { if(length(gr) == 1L) { attr(f, "levels") <- tochar(gr[[1L]]) # or formatC ? } else { attr(f, "levels") <- do.call(paste, c(gr, list(sep = "."))) } } oldClass(f) <- if(ordered) c("ordered","factor","na.included") else c("factor","na.included") # previously if any(x[[6L]]) f } as.factor_GRP <- function(x, ordered = FALSE) { message("Note that 'as.factor_GRP' was renamed to 'as_factor_GRP'. It will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") as_factor_GRP(x, ordered) } finteraction <- function(..., ordered = FALSE, sort = TRUE, method = "auto") { # does it drop levels ? -> Yes ! if(...length() == 1L && is.list(...)) return(as_factor_GRP(GRP.default(..., sort = sort, method = method, call = FALSE), ordered)) as_factor_GRP(GRP.default(list(...), sort = sort, method = method, call = FALSE), ordered) } GRP.qG <- function(X, ..., group.sizes = TRUE, return.groups = TRUE, call = TRUE) { # if(!missing(...)) unused_arg_action(match.call(), ...) gvars <- l1orlst(as.character(substitute(X))) # paste(all.vars(call), collapse = ".") # good in all circumstances ? ng <- attr(X, "N.groups") grl <- return.groups && length(groups <- attr(X, "groups")) if(!inherits(X, "na.included")) if(anyNA(unclass(X))) { ng <- ng + 1L X[is.na(X)] <- ng if(grl) groups <- c(groups, NA) } ordered <- if(is.ordered(X)) c(TRUE, NA) else c(FALSE, NA) attributes(X) <- NULL return(`oldClass<-`(list(N.groups = ng, group.id = X, group.sizes = if(group.sizes) tabulate(X, ng) else NULL, # .Internal(tabulate(X, ng)) groups = if(grl) `names<-`(list(groups), gvars) else NULL, group.vars = gvars, ordered = ordered, order = NULL, # starts = NULL, maxgrpn = NULL, call = if(call) match.call() else NULL), "GRP")) } GRP.factor <- function(X, ..., group.sizes = TRUE, drop = FALSE, return.groups = TRUE, call = TRUE) { # if(!missing(...)) unused_arg_action(match.call(), ...) nam <- l1orlst(as.character(substitute(X))) # paste(all.vars(call), collapse = ".") # good in all circumstances ? if(!inherits(X, "na.included")) X <- addNA2(X) if(drop) X <- .Call(Cpp_fdroplevels, X, FALSE) lev <- attr(X, "levels") nl <- length(lev) ordered <- if(is.ordered(X)) c(TRUE, NA) else c(FALSE, NA) attributes(X) <- NULL return(`oldClass<-`(list(N.groups = nl, group.id = X, group.sizes = if(group.sizes) tabulate(X, nl) else NULL, # .Internal(tabulate(X, nl)) groups = if(return.groups) `names<-`(list(lev), nam) else NULL, group.vars = nam, ordered = ordered, order = NULL, # starts = NULL, maxgrpn = NULL, call = if(call) match.call() else NULL), "GRP")) } GRP.pseries <- function(X, effect = 1L, ..., group.sizes = TRUE, return.groups = TRUE, call = TRUE) { g <- unclass(getpix(attr(X, "index"))) # index cannot be atomic since plm always adds a time variable ! if(length(effect) > 1L) return(GRP.default(g[effect], ...)) # if(!missing(...)) unused_arg_action(match.call(), ...) # if(length(g) > 2L) { # mlg <- -length(g) # nam <- paste(names(g)[mlg], collapse = ".") # g <- interaction(g[mlg], drop = TRUE) # } else { nam <- if(is.character(effect)) effect else names(g)[effect] g <- g[[effect]] # Fastest way to do this ? # } lev <- attr(g, "levels") nl <- length(lev) ordered <- if(is.ordered(g)) c(TRUE, NA) else c(FALSE, NA) attributes(g) <- NULL return(`oldClass<-`(list(N.groups = nl, group.id = g, group.sizes = if(group.sizes) tabulate(g, nl) else NULL, # .Internal(tabulate(g, nl)) groups = if(return.groups) `names<-`(list(lev), nam) else NULL, group.vars = nam, ordered = ordered, order = NULL, # starts = NULL, maxgrpn = NULL, call = if(call) match.call() else NULL), "GRP")) } GRP.pdata.frame <- function(X, effect = 1L, ..., group.sizes = TRUE, return.groups = TRUE, call = TRUE) GRP.pseries(X, effect, ..., group.sizes = group.sizes, return.groups = return.groups, call = call) fgroup_by <- function(.X, ..., sort = TRUE, decreasing = FALSE, na.last = TRUE, return.order = sort, method = "auto") { # e <- substitute(list(...)) # faster but does not preserve attributes of unique groups ! clx <- oldClass(.X) oldClass(.X) <- NULL m <- match(c("GRP_df", "grouped_df", "data.frame"), clx, nomatch = 0L) dots <- substitute(list(...)) vars <- all.vars(dots, unique = FALSE) # In case sequences of columns are passed... if(length(vars)+1L != length(dots) && any(all.names(dots) == ":")) { # Note that fgroup_by(mtcars, bla = round(mpg / cyl), vs:am) only groups by vs, and am. fselect(mtcars, bla = round(mpg / cyl), vs:am) also does the wrong thing. nl <- `names<-`(as.vector(seq_along(.X), "list"), names(.X)) vars <- eval(substitute(c(...)), nl, parent.frame()) e <- .X[vars] # This allows renaming... if(length(nam_vars <- names(vars))) { nonmiss <- nzchar(nam_vars) names(e)[nonmiss] <- nam_vars[nonmiss] } # e <- fselect(if(m[2L]) fungroup(.X) else .X, ...) } else { e <- eval(dots, .X, parent.frame()) name <- names(e) # If something else than NSE cols is supplied if(length(e) == 1L && length(e[[1L]]) != length(.X[[1L]]) && is.null(name)) { e <- .X[cols2int(e[[1L]], .X, names(.X), FALSE)] } else { if(length(name)) { # fgroup_by(mtcars, bla = round(mpg / cyl), vs, am) nonmiss <- nzchar(name) # -> using as.character(dots[-1L]) instead of vars if(!all(nonmiss)) names(e) <- `[<-`(as.character(dots[-1L]), nonmiss, value = name[nonmiss]) } else names(e) <- vars } } attr(.X, "groups") <- GRP.default(e, NULL, sort, decreasing, na.last, TRUE, return.order, method, FALSE) # if(any(clx == "sf")) oldClass(.X) <- clx[clx != "sf"] # attr(.X, "groups") <- GRP.default(fselect(if(m[2L]) fungroup(.X) else .X, ...), NULL, sort, decreasing, na.last, TRUE, return.order, method, FALSE) # Needed: wlddev %>% fgroup_by(country) gives error if dplyr is loaded. Also sf objects etc.. # .rows needs to be list(), NULL won't work !! Note: attaching a data.frame class calls data frame methods, even if "list" in front! -> Need GRP.grouped_df to restore object ! # attr(.X, "groups") <- `oldClass<-`(c(g, list(.rows = list())), c("GRP", "data.frame")) # `names<-`(eval(e, .X, parent.frame()), all.vars(e)) oldClass(.X) <- c("GRP_df", if(length(mp <- m[m != 0L])) clx[-mp] else clx, "grouped_df", if(m[3L]) "data.frame") # clx[-m] doesn't work if clx is only "data.table" for example # simplest, but .X is coerced to data.frame. Through the above solution it can be a list and only receive the 'grouped_df' class # add_cl <- c("grouped_df", "data.frame") # oldClass(.X) <- c(fsetdiff(oldClass(.X), add_cl), add_cl) if(any(clx == "data.table")) return(alc(.X)) .X } gby <- fgroup_by print.GRP_df <- function(x, ...) { print(fungroup(x)) # better !! (the method could still print groups attribute etc. ) And can also get rid of .rows() in fgroup_by and other fuzz.. # but better keep for now, other functions in dplyr might check this and only preserve attributes if they exist. -> Nah. select(UGA_sf, addr_cname) doesn't work anyway.. # NextMethod() g <- attr(x, "groups") if(is_GRP(g)) { # Issue Patrice flagged ! # oldClass(g) <- NULL # could get rid of this if get rid of "data.frame" class. if(length(g[[3L]])) { su <- unclass(qsu.default(g[[3L]], stable.algo = FALSE)) stats <- if(su[4L] == su[5L]) paste0(" [", g[[1L]], " | ", round(su[2L]), " (", round(su[3L], 1L), ")]") else paste0(" [", g[[1L]], " | ", round(su[2L]), " (", round(su[3L], 1L), ") ", su[4L], "-", su[5L], "]") } else stats <- paste0(" [", g[[1L]], " | ", round(length(g[[2L]]) / g[[1L]]), "]") # Groups: # if(any(g[[6L]])) "ordered groups" else "unordered groups", -> ordered 99% of times... cat("\nGrouped by: ", paste(g[[5L]], collapse = ", "), stats, "\n") if(inherits(x, "pdata.frame")) message("\nNote: 'pdata.frame' methods for flag, fdiff, fgrowth, fbetween, fwithin and varying\n take precedence over the 'grouped_df' methods for these functions.") } } print.invisible <- function(x, ...) cat("") # Still solve this properly for data.table... `[.GRP_df` <- function(x, ...) { clx <- oldClass(x) if(any(clx == "data.table")) { res <- NextMethod() if(any(clx == "invisible")) { # for chaining... clx <- clx[clx != "invisible"] oldClass(res) <- clx # in case of early return (reduced rows)... } if(any(grepl(":=", .c(...)))) { eval.parent(substitute(x <- res)) oldClass(res) <- c("invisible", clx) # return(invisible(res)) -> doesn't work here for some reason } else { if(!(is.list(res) && fnrow2(res) == fnrow2(x))) return(fungroup(res)) if(is.null(attr(res, "groups"))) attr(res, "groups") <- attr(x, "groups") oldClass(res) <- clx } } else { res <- `[`(fungroup(x), ...) # does not respect data.table properties, but better for sf data frame and others which check validity of "groups" attribute if(!(is.list(res) && fnrow2(res) == fnrow2(x))) return(res) attr(res, "groups") <- attr(x, "groups") oldClass(res) <- clx } res } # missing doesn't work, its invidible return... # `[.GRP_df` <- function(x, ...) { # tstop <- function(x) if(missing(x)) NULL else x # res <- tstop(NextMethod()) # better than above (problems with data.table method, but do further checks...) # if(is.null(res)) return(NULL) # if(!(is.list(res) && fnrow2(res) == fnrow2(x))) return(fungroup(res)) # if(is.null(g <- attr(res, "groups"))) attr(res, "groups") <- g # oldClass(res) <- oldClass(x) # return(res) # } # also needed to sort out errors with dplyr ... `[[.GRP_df` <- function(x, ...) UseMethod("[[", fungroup(x)) # function(x, ..., exact = TRUE) .subset2(x, ..., exact = exact) `[<-.GRP_df` <- function(x, ..., value) UseMethod("[<-", fungroup(x)) `[[<-.GRP_df` <- function(x, ..., value) UseMethod("[[<-", fungroup(x)) # Produce errors... # print_GRP_df_core <- function(x) { # g <- attr(x, "groups") # cat("\nGrouped by: ", paste(g[[5L]], collapse = ", "), # # if(any(g[[6L]])) "ordered groups" else "unordered groups", -> ordered 99% of times... # paste0(" [", g[[1L]], " | ", round(length(g[[2L]]) / g[[1L]]), " (", round(fsd.default(g[[3L]]), 1), ")]")) # if(inherits(x, "pdata.frame")) # message("\nNote: 'pdata.frame' methods for flag, fdiff, fgrowth, fbetween, fwithin and varying\n take precedence over the 'grouped_df' methods for these functions.") # } # # head.GRP_df <- function(x, ...) { # NextMethod() # print_GRP_df_core(x) # } # # tail.GRP_df <- function(x, ...) { # NextMethod() # print_GRP_df_core(x) # } # "[117 ordered groups | mean(N): 64 | sd(N): 29.7]" # "[117 ordered groups | Avg. N: 64 (SD: 29.7)]" fungroup <- function(X, ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) clx <- oldClass(X) attr(X, "groups") <- NULL oldClass(X) <- fsetdiff(clx, c("GRP_df", "grouped_df")) # clx[clx != "grouped_df"] if(any(clx == "data.table")) return(alc(X)) X } # collapse 1.3.2 versions: # fgroup_by <- function(X, ..., sort = TRUE, decreasing = FALSE, na.last = TRUE, return.order = FALSE) { # e <- substitute(list(...)) # faster but does not preserve attributes of unique groups !! # clx <- oldClass(X) # attr(X, "groups") <- GRP.default(fselect(X, ...), NULL, sort, decreasing, na.last, TRUE, return.order, FALSE) # `names<-`(eval(e, X, parent.frame()), all.vars(e)) # attr(X, "was.tibble") <- any(clx == "tbl_df") # add_cl <- if(any(clx == "data.table")) c("data.table", "tbl_df", "tbl", "grouped_df") else c("tbl_df", "tbl", "grouped_df") # oldClass(X) <- c(add_cl, fsetdiff(clx, add_cl)) # necesssary to avoid printing errors... (i.e. wrong group object etc...) # X # } # # fungroup <- function(X, untibble = isFALSE(attr(X, "was.tibble"))) { # clx <- oldClass(X) # attr(X, "groups") <- NULL # if(untibble) { # oldClass(X) <- fsetdiff(clx, c("tbl_df", "tbl", "grouped_df")) # attr(X, "was.tibble") <- NULL # } else oldClass(X) <- clx[clx != "grouped_df"] # X # } condCopyAttrib <- function(x, d) { if(is.object(x)) return(x) rn <- c(NA_integer_, -length(x[[1L]])) cld <- oldClass(d) oldClass(d) <- NULL attr(d, "groups") <- NULL attr(d, "row.names") <- NULL x <- copyMostAttributes(x, d) attr(x, "row.names") <- rn oldClass(x) <- fsetdiff(cld, c("GRP_df", "grouped_df", "sf")) if(any(cld == "data.table")) return(alc(x)) x } fgroup_vars <- function(X, return = "data") { g <- attr(X, "groups") if(!is.list(g)) stop("attr(X, 'groups') is not a grouping object") vars <- if(is_GRP(g)) g[[5L]] else attr(g, "names")[-length(unclass(g))] switch(return, data = .Call(C_subsetCols, fungroup(X), ckmatch(vars, attr(X, "names")), TRUE), unique = if(is_GRP(g)) condCopyAttrib(g[[4L]], X) else .Call(C_subsetCols, g, -length(unclass(g)), FALSE), # what about attr(*, ".drop") ?? names = vars, indices = ckmatch(vars, attr(X, "names")), named_indices = `names<-`(ckmatch(vars, attr(X, "names")), vars), logical = `[<-`(logical(length(unclass(X))), ckmatch(vars, attr(X, "names")), TRUE), named_logical = { nam <- attr(X, "names") `names<-`(`[<-`(logical(length(nam)), ckmatch(vars, nam), TRUE), nam) }, stop("Unknown return option!")) } GRP.grouped_df <- function(X, ..., return.groups = TRUE, call = TRUE) { # if(!missing(...)) unused_arg_action(match.call(), ...) # g <- unclass(attr(X, "groups")) g <- attr(X, "groups") if(is_GRP(g)) return(g) # return(`oldClass<-`(.subset(g, 1:8), "GRP")) # To avoid data.frame methods being called if(!is.list(g)) stop("attr(X, 'groups') is not a grouping object") oldClass(g) <- NULL lg <- length(g) gr <- g[[lg]] ng <- length(gr) gs <- vlengths(gr, FALSE) return(`oldClass<-`(list(N.groups = ng, # The C code here speeds up things a lot !! group.id = .Call(C_groups2GRP, gr, fnrow2(X), gs), # Old: rep(seq_len(ng), gs)[order(unlist(gr, FALSE, FALSE))], # .Internal(radixsort(TRUE, FALSE, FALSE, TRUE, .Internal(unlist(gr, FALSE, FALSE)))) group.sizes = gs, groups = if(return.groups) g[-lg] else NULL, # better reclass afterwards ? -> Nope, this is only used in internal codes... group.vars = names(g)[-lg], ordered = c(TRUE, NA), # Important to have NA here, otherwise wrong result in gsplit (wrong optimization) order = NULL, # starts = NULL, maxgrpn = NULL, call = if(call) match.call() else NULL), "GRP")) } is_qG <- function(x) inherits(x, "qG") is.qG <- function(x) { message("Note that 'is.qG' was renamed to 'is_qG'. It will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") inherits(x, "qG") } na_rm2 <- function(x, sort) { if(sort) return(if(is.na(x[length(x)])) x[-length(x)] else x) na_rm(x) # if(anyNA(x)) x[!is.na(x)] else x # use na_rm here when speed fixed.. } Csv <- function(x, i) .Call(C_subsetVector, x, i, FALSE) # What about NA last option to radixsort ? -> Nah, vector o becomes too short... radixfact <- function(x, sort, ord, fact, naincl, keep, retgrp = FALSE) { o <- .Call(C_radixsort, TRUE, FALSE, fact || naincl || retgrp, naincl, sort, pairlist(x)) st <- attr(o, "starts") sorted <- attr(o, "sorted") f <- if(naincl) .Call(C_frankds, o, st, attr(o, "group.sizes"), sorted) else # Fastest? -> Seems so.. .Call(Cpp_groupid, x, if(sorted) NULL else o, 1L, TRUE, FALSE) if(fact) { if(keep) duplAttributes(f, x) else attributes(f) <- NULL rawlev <- Csv(x, if(sorted) st else Csv(o, st)) attr(f, "levels") <- unattrib(tochar(if(naincl) rawlev else na_rm2(rawlev, sort))) oldClass(f) <- c(if(ord) "ordered", "factor", if(naincl) "na.included") } else { if(naincl) attr(f, "N.groups") <- length(st) # the order is important, this before retgrp !! if(retgrp) { rawlev <- Csv(x, if(sorted) st else Csv(o, st)) attr(f, "groups") <- if(naincl) rawlev else na_rm2(rawlev, sort) } oldClass(f) <- c(if(ord) "ordered", "qG", if(naincl) "na.included") } f } # TODO: Why is numeric to character conversion so slow?... groupfact <- function(x, ord, fact, naincl, keep, retgrp = FALSE) { g <- .Call(C_groupat, x, fact || retgrp, naincl) if(fact) { lev <- unattrib(tochar(Csv(x, attr(g, "starts")))) if(keep) duplAttributes(g, x) attr(g, "levels") <- lev oldClass(g) <- c(if(ord) "ordered", "factor", if(naincl) "na.included") } else { if(retgrp) attr(g, "groups") <- Csv(x, attr(g, "starts")) oldClass(g) <- c(if(ord) "ordered", "qG", if(naincl) "na.included") } attr(g, "starts") <- NULL g } hashfact <- function(x, sort, ord, fact, naincl, keep, retgrp = FALSE) { if(sort) return(.Call(Cpp_qF, x, ord, !naincl, keep, if(fact) 1L else 2L+retgrp)) groupfact(x, ord, fact, naincl, keep, retgrp) } as_factor_qG <- function(x, ordered = FALSE, na.exclude = TRUE) { groups <- if(is.null(attr(x, "groups"))) as.character(seq_len(attr(x, "N.groups"))) else tochar(attr(x, "groups")) nainc <- inherits(x, "na.included") if(na.exclude || nainc) { clx <- c(if(ordered) "ordered", "factor", if(nainc) "na.included") # can set unordered ?? } else { if(anyNA(unclass(x))) { x[is.na(x)] <- attr(x, "N.groups") + 1L groups <- c(groups, NA_character_) # faster doing groups[length(groups)+1] <- NA? -> Nope, what you have is fastest ! } clx <- c(if(ordered) "ordered", "factor", "na.included") } return(`attributes<-`(x, list(levels = groups, class = clx))) } as.factor_qG <- function(x, ordered = FALSE, na.exclude = TRUE) { message("Note that 'as.factor_qG' was renamed to 'as_factor_qG'. It will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") as_factor_qG(x, ordered, na.exclude) } qF <- function(x, ordered = FALSE, na.exclude = TRUE, sort = TRUE, drop = FALSE, keep.attr = TRUE, method = "auto") { if(is.factor(x) && sort) { if(!keep.attr && !all(names(ax <- attributes(x)) %in% c("levels", "class"))) attributes(x) <- ax[c("levels", "class")] if(na.exclude || inherits(x, "na.included")) { clx <- oldClass(x) if(ordered && !any(clx == "ordered")) oldClass(x) <- c("ordered", clx) else # can set unordered ?? if(!ordered && any(clx == "ordered")) oldClass(x) <- clx[clx != "ordered"] if(drop) return(.Call(Cpp_fdroplevels, x, !inherits(x, "na.included"))) else return(x) } x <- addNA2(x) oldClass(x) <- c(if(ordered) "ordered", "factor", "na.included") if(drop) return(.Call(Cpp_fdroplevels, x, FALSE)) else return(x) } if(is_qG(x)) return(as_factor_qG(x, ordered, na.exclude)) # && sort?? switch(method, # if((is.character(x) && !na.exclude) || (length(x) < 500 && !(is.character(x) && na.exclude))) auto = if(is.character(x) || is.logical(x) || !sort || length(x) < 500L) hashfact(x, sort, ordered, TRUE, !na.exclude, keep.attr) else # .Call(Cpp_qF, x, sort, ordered, na.exclude, keep.attr, 1L) radixfact(x, sort, ordered, TRUE, !na.exclude, keep.attr), radix = radixfact(x, sort, ordered, TRUE, !na.exclude, keep.attr), hash = hashfact(x, sort, ordered, TRUE, !na.exclude, keep.attr), # .Call(Cpp_qF, x, sort, ordered, na.exclude, keep.attr, 1L), stop("Unknown method:", method)) } # TODO: Keep if(ordered) "ordered" ? qG <- function(x, ordered = FALSE, na.exclude = TRUE, sort = TRUE, return.groups = FALSE, method = "auto") { if(inherits(x, c("factor", "qG"))) { nainc <- inherits(x, "na.included") if(na.exclude || nainc || !anyNA(unclass(x))) { newclx <- c(if(ordered) "ordered", "qG", if(nainc || !na.exclude) "na.included") if(is.factor(x)) { ax <- if(return.groups) list(N.groups = fnlevels(x), groups = attr(x, "levels"), class = newclx) else list(N.groups = fnlevels(x), class = newclx) } else { ax <- if(return.groups) list(N.groups = attr(x, "N.groups"), groups = attr(x, "groups"), class = newclx) else list(N.groups = attr(x, "N.groups"), class = newclx) } return(`attributes<-`(x, ax)) } newclx <- c(if(ordered) "ordered", "qG", "na.included") if(is.factor(x)) { lev <- attr(x, "levels") if(anyNA(lev)) ng <- length(lev) else { ng <- length(lev) + 1L if(return.groups) lev <- c(lev, NA_character_) } attributes(x) <- NULL # factor method seems faster, however cannot assign integer, must assign factor level... } else { if(return.groups && length(lev <- attr(x, "groups"))) lev <- c(lev, NA) ng <- attr(x, "N.groups") + 1L } ax <- if(return.groups) list(N.groups = ng, groups = lev, class = newclx) else list(N.groups = ng, class = newclx) x[is.na(x)] <- ng return(`attributes<-`(x, ax)) } switch(method, # if((is.character(x) && !na.exclude) || (length(x) < 500 && !(is.character(x) && na.exclude))) auto = if(is.character(x) || is.logical(x) || !sort || length(x) < 500L) hashfact(x, sort, ordered, FALSE, !na.exclude, FALSE, return.groups) else # .Call(Cpp_qF, x, sort, ordered, na.exclude, FALSE, 2L+return.groups) radixfact(x, sort, ordered, FALSE, !na.exclude, FALSE, return.groups), radix = radixfact(x, sort, ordered, FALSE, !na.exclude, FALSE, return.groups), hash = hashfact(x, sort, ordered, FALSE, !na.exclude, FALSE, return.groups), # .Call(Cpp_qF, x, sort, ordered, na.exclude, FALSE, 2L+return.groups), stop("Unknown method:", method)) } radixuniquevec <- function(x, sort, na.last = TRUE, decreasing = FALSE) { o <- .Call(C_radixsort, na.last, decreasing, TRUE, FALSE, sort, pairlist(x)) if(attr(o, "maxgrpn") == 1L && (!sort || attr(o, "sorted"))) return(x) Csv(x, if(attr(o, "sorted")) attr(o, "starts") else Csv(o, attr(o, "starts"))) } funique <- function(x, ...) UseMethod("funique") funique.default <- function(x, sort = FALSE, method = "auto", ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) if(is.array(x)) stop("funique currently only supports atomic vectors and data.frames") switch(method, auto = if(sort && is.numeric(x) && length(x) > 500L) radixuniquevec(x, sort, ...) else .Call(Cpp_funique, x, sort), radix = radixuniquevec(x, sort, ...), hash = .Call(Cpp_funique, x, sort)) # , ... adding dots gives error message too strict, package default is warning.. } # could make faster still... not using colsubset but something more simple... no attributes needed... # Enable by formula use ?? by or cols ?? -> cols is clearer !! also with na_omit, by could imply by-group uniqueness check... funique.data.frame <- function(x, cols = NULL, sort = FALSE, method = "auto", ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) use.group <- switch(method, auto = !sort, hash = TRUE, radix = FALSE, stop("method needs to be 'auto', 'hash' or 'radix'.")) o <- if(is.null(cols)) switchGRP(x, starts = TRUE, sort = sort, use.group = use.group, ...) else switchGRP(colsubset(x, cols), starts = TRUE, sort = sort, use.group = use.group, ...) # if(is.call(by)) .subset(x, ckmatch(attr(x, "names"), all.vars(by))) if((use.group && length(o) == attr(o, "N.groups")) || (!use.group && attr(o, "maxgrpn") == 1L && (!sort || attr(o, "sorted")))) # return(x) return(if(inherits(x, "data.table")) alc(x) else x) st <- if(use.group || attr(o, "sorted")) attr(o, "starts") else Csv(o, attr(o, "starts")) rn <- attr(x, "row.names") if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") return(.Call(C_subsetDT, x, st, seq_along(unclass(x)), FALSE)) return(`attr<-`(.Call(C_subsetDT, x, st, seq_along(unclass(x)), FALSE), "row.names", rn[st])) } funique.list <- function(x, cols = NULL, sort = FALSE, method = "auto", ...) funique.data.frame(x, cols, sort, method, ...) funique.sf <- function(x, cols = NULL, sort = FALSE, method = "auto", ...) { use.group <- switch(method, auto = !sort, hash = TRUE, radix = FALSE, stop("method needs to be 'auto', 'hash' or 'radix'.")) cols <- if(is.null(cols)) whichv(attr(x, "names"), attr(x, "sf_column"), TRUE) else cols2int(cols, x, attr(x, "names"), FALSE) o <- switchGRP(.subset(x, cols), starts = TRUE, sort = sort, use.group = use.group, ...) if((use.group && length(o) == attr(o, "N.groups")) || (!use.group && attr(o, "maxgrpn") == 1L && (!sort || attr(o, "sorted")))) return(x) st <- if(use.group || attr(o, "sorted")) attr(o, "starts") else Csv(o, attr(o, "starts")) rn <- attr(x, "row.names") if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") return(.Call(C_subsetDT, x, st, seq_along(unclass(x)), FALSE)) return(`attr<-`(.Call(C_subsetDT, x, st, seq_along(unclass(x)), FALSE), "row.names", rn[st])) } fdroplevels <- function(x, ...) UseMethod("fdroplevels") fdroplevels.default <- function(x, ...) { message("Trying to drop levels from an unsupported object: returning object") x } fdroplevels.factor <- function(x, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) clx <- class(x) if(!any(clx == "factor")) stop("x needs to be a factor") .Call(Cpp_fdroplevels, x, !any(clx == "na.included")) } fdroplevels.data.frame <- function(x, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) res <- duplAttributes(lapply(unattrib(x), function(y) if(is.factor(y)) .Call(Cpp_fdroplevels, y, !inherits(y, "na.included")) else y), x) if(inherits(x, "data.table")) return(alc(res)) res } fdroplevels.list <- fdroplevels.data.frame # Old R-based trial # fdroplevels.factor <- function(x, ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) # lev <- attr(x, "levels") # ul <- .Call(Cpp_funique, unclass(x), TRUE) # lul <- length(ul) # if(!is.na(ul[lul])) { # NA always comes last # if(lul == length(lev)) return(x) # f <- match(unclass(x), ul) # use Rcpp match ?? # attr(f, "levels") <- lev[ul] # return(f) # } # if(lul-1L == length(lev)) return(x) # f <- match(unclass(x), ul[-lul]) # use Rcpp match ?? # attr(f, "levels") <- lev[ul[-lul]] # f # } collapse/R/fmin_fmax.R0000644000176200001440000002502514172367040014354 0ustar liggesusers # For foundational changes to this code see fsum.R !! fmin <- function(x, ...) UseMethod("fmin") # , x fmin.default <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fmin.matrix(x, g, TRA, na.rm, use.g.names, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(C_fmin,x,0L,0L,na.rm)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(C_fmin,x,length(lev),g,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(C_fmin,x,fnlevels(g),g,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fmin,x,attr(g,"N.groups"),g,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(C_fmin,x,g[[1L]],g[[2L]],na.rm), GRPnames(g))) return(.Call(C_fmin,x,g[[1L]],g[[2L]],na.rm)) } if(is.null(g)) return(.Call(Cpp_TRA,x,.Call(C_fmin,x,0L,0L,na.rm),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRA,x,.Call(C_fmin,x,g[[1L]],g[[2L]],na.rm),g[[2L]],TtI(TRA)) } fmin.matrix <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(C_fminm,x,0L,0L,na.rm,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(C_fminm,x,length(lev),g,na.rm,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(C_fminm,x,fnlevels(g),g,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fminm,x,attr(g,"N.groups"),g,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(C_fminm,x,g[[1L]],g[[2L]],na.rm,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(C_fminm,x,g[[1L]],g[[2L]],na.rm,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAm,x,.Call(C_fminm,x,0L,0L,na.rm,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAm,x,.Call(C_fminm,x,g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TtI(TRA)) } fmin.data.frame <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(C_fminl,x,0L,0L,na.rm,drop)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(C_fminl,x,length(lev),g,na.rm,FALSE), lev)) } if(is.nmfactor(g)) return(.Call(C_fminl,x,fnlevels(g),g,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fminl,x,attr(g,"N.groups"),g,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(C_fminl,x,g[[1L]],g[[2L]],na.rm,FALSE), groups)) return(.Call(C_fminl,x,g[[1L]],g[[2L]],na.rm,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAl,x,.Call(C_fminl,x,0L,0L,na.rm,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAl,x,.Call(C_fminl,x,g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TtI(TRA)) } fmin.list <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) fmin.data.frame(x, g, TRA, na.rm, use.g.names, drop, ...) fmin.grouped_df <- function(x, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam[-gn]) return(setAttributes(c(g[[4L]],.Call(C_fminl,x[-gn],g[[1L]],g[[2L]],na.rm,FALSE)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(C_fminl,x[-gn],g[[1L]],g[[2L]],na.rm,FALSE), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]],.Call(C_fminl,x,g[[1L]],g[[2L]],na.rm,FALSE)), ax)) } else return(setAttributes(.Call(C_fminl,x,g[[1L]],g[[2L]],na.rm,FALSE), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(nam[gn], nam[-gn]) return(setAttributes(c(x[gn],.Call(Cpp_TRAl,x[-gn],.Call(C_fminl,x[-gn],g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TtI(TRA))), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(Cpp_TRAl,x[-gn],.Call(C_fminl,x[-gn],g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TtI(TRA)), ax)) } else return(.Call(Cpp_TRAl,x,.Call(C_fminl,x,g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TtI(TRA))) } fmax <- function(x, ...) UseMethod("fmax") # , x fmax.default <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fmax.matrix(x, g, TRA, na.rm, use.g.names, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(C_fmax,x,0L,0L,na.rm)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(C_fmax,x,length(lev),g,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(C_fmax,x,fnlevels(g),g,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fmax,x,attr(g,"N.groups"),g,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(C_fmax,x,g[[1L]],g[[2L]],na.rm), GRPnames(g))) return(.Call(C_fmax,x,g[[1L]],g[[2L]],na.rm)) } if(is.null(g)) return(.Call(Cpp_TRA,x,.Call(C_fmax,x,0L,0L,na.rm),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRA,x,.Call(C_fmax,x,g[[1L]],g[[2L]],na.rm),g[[2L]],TtI(TRA)) } fmax.matrix <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(C_fmaxm,x,0L,0L,na.rm,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(C_fmaxm,x,length(lev),g,na.rm,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(C_fmaxm,x,fnlevels(g),g,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fmaxm,x,attr(g,"N.groups"),g,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(C_fmaxm,x,g[[1L]],g[[2L]],na.rm,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(C_fmaxm,x,g[[1L]],g[[2L]],na.rm,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAm,x,.Call(C_fmaxm,x,0L,0L,na.rm,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAm,x,.Call(C_fmaxm,x,g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TtI(TRA)) } fmax.data.frame <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(C_fmaxl,x,0L,0L,na.rm,drop)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(C_fmaxl,x,length(lev),g,na.rm,FALSE), lev)) } if(is.nmfactor(g)) return(.Call(C_fmaxl,x,fnlevels(g),g,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fmaxl,x,attr(g,"N.groups"),g,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(C_fmaxl,x,g[[1L]],g[[2L]],na.rm,FALSE), groups)) return(.Call(C_fmaxl,x,g[[1L]],g[[2L]],na.rm,FALSE)) } if(is.null(g)) return(.Call(Cpp_TRAl,x,.Call(C_fmaxl,x,0L,0L,na.rm,TRUE),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAl,x,.Call(C_fmaxl,x,g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TtI(TRA)) } fmax.list <- function(x, g = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) fmax.data.frame(x, g, TRA, na.rm, use.g.names, drop, ...) fmax.grouped_df <- function(x, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam[-gn]) return(setAttributes(c(g[[4L]],.Call(C_fmaxl,x[-gn],g[[1L]],g[[2L]],na.rm,FALSE)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(C_fmaxl,x[-gn],g[[1L]],g[[2L]],na.rm,FALSE), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]],.Call(C_fmaxl,x,g[[1L]],g[[2L]],na.rm,FALSE)), ax)) } else return(setAttributes(.Call(C_fmaxl,x,g[[1L]],g[[2L]],na.rm,FALSE), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(nam[gn], nam[-gn]) return(setAttributes(c(x[gn],.Call(Cpp_TRAl,x[-gn],.Call(C_fmaxl,x[-gn],g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TtI(TRA))), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(Cpp_TRAl,x[-gn],.Call(C_fmaxl,x[-gn],g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TtI(TRA)), ax)) } else return(.Call(Cpp_TRAl,x,.Call(C_fmaxl,x,g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TtI(TRA))) } collapse/R/dapply.R0000644000176200001440000001613514063465116013705 0ustar liggesusers dapply <- function(X, FUN, ..., MARGIN = 2, parallel = FALSE, mc.cores = 1L, return = c("same", "matrix", "data.frame"), drop = TRUE) { rowwl <- switch(MARGIN, `1` = TRUE, `2` = FALSE, stop("MARGIN only supports 2 - columns or 1 - rows")) aplyfun <- if(parallel) function(...) mclapply(..., mc.cores = mc.cores) else lapply if(is.atomic(X)) { dX <- dim(X) if(length(dX) != 2L) stop("dapply cannot handle vectors or higher-dimensional arrays") res <- if(rowwl) aplyfun(.Call(Cpp_mrtl, X, FALSE, 0L), FUN, ...) else aplyfun(.Call(Cpp_mctl, X, FALSE, 0L), FUN, ...) lx1 <- length(res[[1L]]) if(lx1 == 1L && drop) return(`names<-`(unlist(res, use.names = FALSE), dimnames(X)[[if(rowwl) 1L else 2L]])) switch(return[1L], same = { ax <- attributes(X) retmatl <- TRUE }, matrix = { ax <- list(dim = dX, dimnames = dimnames(X)) retmatl <- TRUE }, data.frame = { dn <- dimnames(X) ax <- list(names = dn[[2L]], row.names = if(is.null(dn[[1L]])) .set_row_names(dX[1L]) else dn[[1L]], class = "data.frame") retmatl <- FALSE }, stop("Unknown return option!")) } else { ax <- attributes(X) attributes(X) <- NULL res <- if(rowwl) aplyfun(.Call(Cpp_mrtl, do.call(cbind, X), FALSE, 0L), FUN, ...) else aplyfun(X, FUN, ...) lx1 <- length(res[[1L]]) if(lx1 == 1L && drop) return(`names<-`(unlist(res, use.names = FALSE), if(rowwl) charorNULL(ax[["row.names"]]) else ax[["names"]])) dX <- c(length(X[[1L]]), length(X)) switch(return[1L], same = retmatl <- FALSE, matrix = { ax <- list(dim = dX, dimnames = list(charorNULL(ax[["row.names"]]), ax[["names"]])) retmatl <- TRUE }, data.frame = { ax <- list(names = ax[["names"]], row.names = if(is.null(ax[["row.names"]])) .set_row_names(dX[1L]) else ax[["row.names"]], class = "data.frame") retmatl <- FALSE }, stop("Unknown return option!")) } if(retmatl) { if(rowwl) { if(lx1 != dX[2L]) { ax[["dim"]][2L] <- lx1 ax[["dimnames"]] <- list(ax[["dimnames"]][[1L]], if(length(nx1 <- names(res[[1L]]))) nx1 else if(lx1 == 1L) deparse(substitute(FUN)) else paste0(deparse(substitute(FUN)), seq_len(lx1))) } res <- matrix(unlist(res, use.names = FALSE), ncol = lx1, byrow = TRUE) } else { if(lx1 != dX[1L]) { ax[["dim"]][1L] <- lx1 ax[["dimnames"]] <- list(if(length(nx1 <- names(res[[1L]]))) nx1 else if(lx1 == 1L) deparse(substitute(FUN)) else paste0(deparse(substitute(FUN)), seq_len(lx1)), ax[["dimnames"]][[2L]]) } res <- do.call(cbind, res) } } else { if(rowwl) { if(lx1 != dX[2L]) ax[["names"]] <- if(length(nx1 <- names(res[[1L]]))) nx1 else if(lx1 == 1L) deparse(substitute(FUN)) else paste0(deparse(substitute(FUN)), seq_len(lx1)) res <- .Call(Cpp_mctl, matrix(unlist(res, use.names = FALSE), ncol = lx1, byrow = TRUE), FALSE, 0L) # definitely faster than do.call(rbind, X) } else if(lx1 != dX[1L]) ax[["row.names"]] <- if(length(nx1 <- names(res[[1L]]))) nx1 else .set_row_names(lx1) # could also make deparse(substitute(FUN)), but that is not so typical for data.frames ! if(any(ax[["class"]] == "data.table")) return(alcSA(res, ax)) } setAttributes(res, ax) } # Notes about this version: same as dapply 3 (compact), but takingdrop case before !! -> faster !! and also solving issue with row.names for matrices -> row and column names must be of same type !! as.matrix.data.frame converst row.names to character !! # Before v1.3.0 # dapply <- function(X, FUN, ..., MARGIN = 2, parallel = FALSE, # mc.cores = 1L, return = c("same","matrix","data.frame"), drop = TRUE) { # ax <- attributes(X) # arl <- is.atomic(X) # is.array, faster ! # rowwl <- switch(MARGIN, `1` = TRUE, `2` = FALSE, stop("MARGIN only supports 2 - columns or 1 - rows")) # retmatl <- switch(return[1L], same = arl, matrix = TRUE, data.frame = FALSE, stop("Unknown return option!")) # aplyfun <- if(parallel) function(...) mclapply(..., mc.cores = mc.cores) else lapply # if(arl) { # dX <- dim(X) # if(length(dX) != 2L) stop("dapply cannot handle vectors or higher-dimensional arrays") # res <- if(rowwl) aplyfun(.Call(Cpp_mrtl, X, FALSE, 0L), FUN, ...) else aplyfun(.Call(Cpp_mctl, X, FALSE, 0L), FUN, ...) # lx1 <- length(res[[1L]]) # if(lx1 == 1L && drop) return(`names<-`(unlist(res, use.names = FALSE), ax[["dimnames"]][[if(rowwl) 1L else 2L]])) # if(!retmatl) { # dn <- dimnames(X) # ax[["dimnames"]] # best ? -> use res instead of reassigning X ! -> no memory loss ! # ax <- list(names = dn[[2L]], row.names = if(is.null(dn[[1L]])) .set_row_names(dX[1L]) else dn[[1L]], # class = "data.frame") # c( ... , ax[!(names(ax) %in% c("dim","dimnames","class"))]) # don't know why one would need this ! # } # } else { # attributes(X) <- NULL # dX <- c(length(X[[1L]]), length(X)) # much faster than dim(X) on a list ! # res <- if(rowwl) aplyfun(.Call(Cpp_mrtl, do.call(cbind, X), FALSE, 0L), FUN, ...) else aplyfun(X, FUN, ...) # do.call(cbind, X) is definitely faster than unlist(X, use.names = FALSE) and attaching dim attribute # lx1 <- length(res[[1L]]) # if(lx1 == 1L && drop) return(`names<-`(unlist(res, use.names = FALSE), if(rowwl) charorNULL(ax[["row.names"]]) else ax[["names"]])) # if(retmatl) ax <- list(dim = dX, dimnames = list(charorNULL(ax[["row.names"]]), ax[["names"]])) # # c(..., ax[!(names(ax) %in% c("names","row.names","class"))]) # don't know why one would need this ! # } # if(retmatl) { # if(rowwl) { # if(lx1 != dX[2L]) { # ax[["dim"]][2L] <- lx1 # ax[["dimnames"]] <- list(ax[["dimnames"]][[1L]], if(length(nx1 <- names(res[[1L]]))) nx1 else if(lx1 == 1L) # deparse(substitute(FUN)) else paste0(deparse(substitute(FUN)), seq_len(lx1))) # } # res <- matrix(unlist(res, use.names = FALSE), ncol = lx1, byrow = TRUE) # } else { # if(lx1 != dX[1L]) { # ax[["dim"]][1L] <- lx1 # ax[["dimnames"]] <- list(if(length(nx1 <- names(res[[1L]]))) nx1 else if(lx1 == 1L) # deparse(substitute(FUN)) else paste0(deparse(substitute(FUN)), seq_len(lx1)), ax[["dimnames"]][[2L]]) # } # res <- do.call(cbind, res) # } # } else { # if(rowwl) { # if(lx1 != dX[2L]) ax[["names"]] <- if(length(nx1 <- names(res[[1L]]))) nx1 else if(lx1 == 1L) # deparse(substitute(FUN)) else paste0(deparse(substitute(FUN)), seq_len(lx1)) # res <- .Call(Cpp_mctl, matrix(unlist(res, use.names = FALSE), ncol = lx1, byrow = TRUE), FALSE, 0L) # definitely faster than do.call(rbind, X) # } else if(lx1 != dX[1L]) # ax[["row.names"]] <- if(length(nx1 <- names(res[[1L]]))) nx1 else .set_row_names(lx1) # could also make deparse(substitute(FUN)), but that is not so typical for data.frames ! # } # setAttributes(res, ax) # } collapse/R/fnth_fmedian.R0000644000176200001440000003272714172367040015041 0ustar liggesusers# Note: for foundational changes to this code see fsum.R fnth <- function(x, n = 0.5, ...) UseMethod("fnth") # , x fnth.default <- function(x, n = 0.5, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, ties = "mean", ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fnth.matrix(x, n, g, w, TRA, na.rm, use.g.names, ties = ties, ...)) ret <- switch(ties, mean = 1L, min = 2L, max = 3L, stop("ties must be 'mean', 'min' or 'max'")) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fnth,x,n,0L,0L,NULL,w,na.rm,ret)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(Cpp_fnth,x,n,length(lev),g,NULL,w,na.rm,ret), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fnth,x,n,fnlevels(g),g,NULL,w,na.rm,ret)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fnth,x,n,attr(g,"N.groups"),g,NULL,w,na.rm,ret)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(Cpp_fnth,x,n,g[[1L]],g[[2L]],g[[3L]],w,na.rm,ret), GRPnames(g))) return(.Call(Cpp_fnth,x,n,g[[1L]],g[[2L]],g[[3L]],w,na.rm,ret)) } if(is.null(g)) return(.Call(Cpp_TRA,x,.Call(Cpp_fnth,x,n,0L,0L,NULL,w,na.rm,ret),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRA,x,.Call(Cpp_fnth,x,n,g[[1L]],g[[2L]],g[[3L]],w,na.rm,ret),g[[2L]],TtI(TRA)) } fnth.matrix <- function(x, n = 0.5, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ties = "mean", ...) { ret <- switch(ties, mean = 1L, min = 2L, max = 3L, stop("ties must be 'mean', 'min' or 'max'")) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fnthm,x,n,0L,0L,NULL,w,na.rm,drop,ret)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(Cpp_fnthm,x,n,length(lev),g,NULL,w,na.rm,FALSE,ret), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(Cpp_fnthm,x,n,fnlevels(g),g,NULL,w,na.rm,FALSE,ret)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fnthm,x,n,attr(g,"N.groups"),g,NULL,w,na.rm,FALSE,ret)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(Cpp_fnthm,x,n,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,ret), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(Cpp_fnthm,x,n,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,ret)) } if(is.null(g)) return(.Call(Cpp_TRAm,x,.Call(Cpp_fnthm,x,n,0L,0L,NULL,w,na.rm,TRUE,ret),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAm,x,.Call(Cpp_fnthm,x,n,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,ret),g[[2L]],TtI(TRA)) } fnth.data.frame <- function(x, n = 0.5, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ties = "mean", ...) { ret <- switch(ties, mean = 1L, min = 2L, max = 3L, stop("ties must be 'mean', 'min' or 'max'")) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fnthl,x,n,0L,0L,NULL,w,na.rm,drop,ret)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(Cpp_fnthl,x,n,length(lev),g,NULL,w,na.rm,FALSE,ret), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fnthl,x,n,fnlevels(g),g,NULL,w,na.rm,FALSE,ret)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fnthl,x,n,attr(g,"N.groups"),g,NULL,w,na.rm,FALSE,ret)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(Cpp_fnthl,x,n,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,ret), groups)) return(.Call(Cpp_fnthl,x,n,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,ret)) } if(is.null(g)) return(.Call(Cpp_TRAl,x,.Call(Cpp_fnthl,x,n,0L,0L,NULL,w,na.rm,TRUE,ret),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAl,x,.Call(Cpp_fnthl,x,n,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,ret),g[[2L]],TtI(TRA)) } fnth.list <- function(x, n = 0.5, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ties = "mean", ...) fnth.data.frame(x, n, g, w, TRA, na.rm, use.g.names, drop, ties, ...) fnth.grouped_df <- function(x, n = 0.5, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, ties = "mean", ...) { if(!missing(...)) unused_arg_action(match.call(), ...) ret <- switch(ties, mean = 1L, min = 2L, max = 3L, stop("ties must be 'mean', 'min' or 'max'")) g <- GRP.grouped_df(x, call = FALSE) wsym <- l1orn(as.character(substitute(w)), NULL) nam <- attr(x, "names") gn2 <- gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) sumw <- NULL if(length(wsym) && length(wn <- whichv(nam, wsym))) { w <- .subset2(x, wn) # faster using unclass? if(any(gn == wn)) stop("Weights coincide with grouping variables!") gn <- c(gn, wn) if(keep.w) { if(nTRAl) sumw <- `names<-`(list(fsumC(w,g[[1L]],g[[2L]],NULL,na.rm)), paste0("sum.", wsym)) else if(keep.group_vars) gn2 <- gn else sumw <- gn2 <- wn } } gl <- length(gn) > 0L # necessary here, not before ! if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], names(sumw), nam[-gn]) return(setAttributes(c(g[[4L]], sumw, .Call(Cpp_fnthl,x[-gn],n,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,ret)), ax)) } ax[["names"]] <- c(names(sumw), nam[-gn]) return(setAttributes(c(sumw, .Call(Cpp_fnthl,x[-gn],n,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,ret)), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]], .Call(Cpp_fnthl,x,n,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,ret)), ax)) } else return(setAttributes(.Call(Cpp_fnthl,x,n,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,ret), ax)) } else if(keep.group_vars || (keep.w && length(sumw))) { ax[["names"]] <- c(nam[gn2], nam[-gn]) return(setAttributes(c(x[gn2],.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fnthl,x[-gn],n,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,ret),g[[2L]],TtI(TRA))), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fnthl,x[-gn],n,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,ret),g[[2L]],TtI(TRA)), ax)) } else return(.Call(Cpp_TRAl,x,.Call(Cpp_fnthl,x,n,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,ret),g[[2L]],TtI(TRA))) } fmedian <- function(x, ...) UseMethod("fmedian") # , x fmedian.default <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fmedian.matrix(x, g, w, TRA, na.rm, use.g.names, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fnth,x,0.5,0L,0L,NULL,w,na.rm,1L)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(Cpp_fnth,x,0.5,length(lev),g,NULL,w,na.rm,1L), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fnth,x,0.5,fnlevels(g),g,NULL,w,na.rm,1L)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fnth,x,0.5,attr(g,"N.groups"),g,NULL,w,na.rm,1L)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(Cpp_fnth,x,0.5,g[[1L]],g[[2L]],g[[3L]],w,na.rm,1L), GRPnames(g))) return(.Call(Cpp_fnth,x,0.5,g[[1L]],g[[2L]],g[[3L]],w,na.rm,1L)) } if(is.null(g)) return(.Call(Cpp_TRA,x,.Call(Cpp_fnth,x,0.5,0L,0L,NULL,w,na.rm,1L),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRA,x,.Call(Cpp_fnth,x,0.5,g[[1L]],g[[2L]],g[[3L]],w,na.rm,1L),g[[2L]],TtI(TRA)) } fmedian.matrix <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fnthm,x,0.5,0L,0L,NULL,w,na.rm,drop,1L)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(Cpp_fnthm,x,0.5,length(lev),g,NULL,w,na.rm,FALSE,1L), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(Cpp_fnthm,x,0.5,fnlevels(g),g,NULL,w,na.rm,FALSE,1L)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fnthm,x,0.5,attr(g,"N.groups"),g,NULL,w,na.rm,FALSE,1L)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(Cpp_fnthm,x,0.5,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,1L), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(Cpp_fnthm,x,0.5,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,1L)) } if(is.null(g)) return(.Call(Cpp_TRAm,x,.Call(Cpp_fnthm,x,0.5,0L,0L,NULL,w,na.rm,TRUE,1L),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAm,x,.Call(Cpp_fnthm,x,0.5,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,1L),g[[2L]],TtI(TRA)) } fmedian.data.frame <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(TRA)) { if(is.null(g)) return(.Call(Cpp_fnthl,x,0.5,0L,0L,NULL,w,na.rm,drop,1L)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(Cpp_fnthl,x,0.5,length(lev),g,NULL,w,na.rm,FALSE,1L), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fnthl,x,0.5,fnlevels(g),g,NULL,w,na.rm,FALSE,1L)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fnthl,x,0.5,attr(g,"N.groups"),g,NULL,w,na.rm,FALSE,1L)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(Cpp_fnthl,x,0.5,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,1L), groups)) return(.Call(Cpp_fnthl,x,0.5,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,1L)) } if(is.null(g)) return(.Call(Cpp_TRAl,x,.Call(Cpp_fnthl,x,0.5,0L,0L,NULL,w,na.rm,TRUE,1L),0L,TtI(TRA))) g <- G_guo(g) .Call(Cpp_TRAl,x,.Call(Cpp_fnthl,x,0.5,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,1L),g[[2L]],TtI(TRA)) } fmedian.list <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = TRUE, drop = TRUE, ...) fmedian.data.frame(x, g, w, TRA, na.rm, use.g.names, drop, ...) fmedian.grouped_df <- function(x, w = NULL, TRA = NULL, na.rm = TRUE, use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- l1orn(as.character(substitute(w)), NULL) nam <- attr(x, "names") gn2 <- gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) sumw <- NULL if(length(wsym) && length(wn <- whichv(nam, wsym))) { w <- .subset2(x, wn) # faster using unclass? if(any(gn == wn)) stop("Weights coincide with grouping variables!") gn <- c(gn, wn) if(keep.w) { if(nTRAl) sumw <- `names<-`(list(fsumC(w,g[[1L]],g[[2L]],NULL,na.rm)), paste0("sum.", wsym)) else if(keep.group_vars) gn2 <- gn else sumw <- gn2 <- wn } } gl <- length(gn) > 0L # necessary here, not before ! if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], names(sumw), nam[-gn]) return(setAttributes(c(g[[4L]], sumw, .Call(Cpp_fnthl,x[-gn],0.5,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,1L)), ax)) } ax[["names"]] <- c(names(sumw), nam[-gn]) return(setAttributes(c(sumw, .Call(Cpp_fnthl,x[-gn],0.5,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,1L)), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]], .Call(Cpp_fnthl,x,0.5,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,1L)), ax)) } else return(setAttributes(.Call(Cpp_fnthl,x,0.5,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,1L), ax)) } else if(keep.group_vars || (keep.w && length(sumw))) { ax[["names"]] <- c(nam[gn2], nam[-gn]) return(setAttributes(c(x[gn2],.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fnthl,x[-gn],0.5,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,1L),g[[2L]],TtI(TRA))), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(Cpp_TRAl,x[-gn],.Call(Cpp_fnthl,x[-gn],0.5,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,1L),g[[2L]],TtI(TRA)), ax)) } else return(.Call(Cpp_TRAl,x,.Call(Cpp_fnthl,x,0.5,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,1L),g[[2L]],TtI(TRA))) } collapse/R/fcumsum.R0000644000176200001440000000603014174223734014065 0ustar liggesusersfcumsum <- function(x, ...) UseMethod("fcumsum") # , x fcumsum.default <- function(x, g = NULL, o = NULL, na.rm = TRUE, fill = FALSE, check.o = TRUE, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(UseMethod("fcumsum", unclass(x))) if(!missing(...)) unused_arg_action(match.call(), ...) if(length(o) && check.o) o <- ford(o, g) if(is.null(g)) return(.Call(C_fcumsum,x,0L,0L,o,na.rm,fill)) g <- G_guo(g) .Call(C_fcumsum,x,g[[1L]],g[[2L]],o,na.rm,fill) } fcumsum.pseries <- function(x, na.rm = TRUE, fill = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) index <- unclass(getpix(attr(x, "index"))) g <- if(length(index) > 2L) finteraction(index[-length(index)]) else index[[1L]] o <- ford(index[[length(index)]], g) if(is.matrix(x)) .Call(C_fcumsumm,x,fnlevels(g),g,o,na.rm,fill) else .Call(C_fcumsum,x,fnlevels(g),g,o,na.rm,fill) } fcumsum.matrix <- function(x, g = NULL, o = NULL, na.rm = TRUE, fill = FALSE, check.o = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(length(o) && check.o) o <- ford(o, g) if(is.null(g)) return(.Call(C_fcumsumm,x,0L,0L,o,na.rm,fill)) g <- G_guo(g) .Call(C_fcumsumm,x,g[[1L]],g[[2L]],o,na.rm,fill) } fcumsum.grouped_df <- function(x, o = NULL, na.rm = TRUE, fill = FALSE, check.o = TRUE, keep.ids = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) osym <- all.vars(substitute(o)) nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) if(length(osym) && !anyNA(on <- match(osym, nam))) { if(length(on) == 1L) { if(any(gn == on)) stop("ordervar coincides with grouping variables!") o <- .subset2(x, on) } else { if(any(gn %in% on)) stop("ordervar coincides with grouping variables!") o <- .subset(x, on) } if(check.o) o <- ford(o, g) gn <- c(gn, on) } if(length(gn)) { ax <- attributes(x) res <- .Call(C_fcumsuml,.subset(x,-gn),g[[1L]],g[[2L]],o,na.rm,fill) if(keep.ids) res <- c(.subset(x, gn), res) ax[["names"]] <- names(res) return(setAttributes(res, ax)) } .Call(C_fcumsuml,x,g[[1L]],g[[2L]],o,na.rm,fill) } fcumsum.data.frame <- function(x, g = NULL, o = NULL, na.rm = TRUE, fill = FALSE, check.o = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(length(o) && check.o) o <- ford(o, g) if(is.null(g)) return(.Call(C_fcumsuml,x,0L,0L,o,na.rm,fill)) g <- G_guo(g) .Call(C_fcumsuml,x,g[[1L]],g[[2L]],o,na.rm,fill) } fcumsum.list <- function(x, g = NULL, o = NULL, na.rm = TRUE, fill = FALSE, check.o = TRUE, ...) fcumsum.data.frame(x, g, o, na.rm, fill, check.o, ...) fcumsum.pdata.frame <- function(x, na.rm = TRUE, fill = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) index <- unclass(getpix(attr(x, "index"))) g <- if(length(index) > 2L) finteraction(index[-length(index)]) else index[[1L]] o <- ford(index[[length(index)]], g) .Call(C_fcumsuml,x,fnlevels(g),g,o,na.rm,fill) } collapse/NEWS.md0000644000176200001440000020307314201327077013162 0ustar liggesusers# collapse 1.7.6 * Corrected a C-level bug in `gsplit` that could lead R to crash in some instances (`gsplit` is used internally in `fsummarise`, `fmutate`, `BY` and `collap` to perform computations with base R (non-optimized) functions). * Ensured that `BY.grouped_df` always (by default) returns grouping columns in aggregations i.e. `iris |> gby(Species) |> nv() |> BY(sum)` now gives the same as `iris |> gby(Species) |> nv() |> fsum()`. * A `.` was added to the first argument of functions `fselect`, `fsubset`, `colorder` and `fgroup_by`, i.e. `fselect(x, ...) -> fselect(.x, ...)`. The reason for this is that over time I added the option to select-rename columns e.g. `fselect(mtcars, cylinders = cyl)`, which was not offered when these functions were created. This presents problems if columns should be renamed into `x`, e.g. `fselect(mtcars, x = cyl)` failed, see [#221](https://github.com/SebKrantz/collapse/issues/221). Renaming the first argument to `.x` somewhat guards against such situations. I think this change is worthwhile to implement, because it makes the package more robust going forward, and usually the first argument of these functions is never invoked explicitly. I really hope this breaks nobody's code. * Added a function `GRPN` to make it easy to add a column of group sizes e.g. `mtcars %>% fgroup_by(cyl,vs,am) %>% ftransform(Sizes = GRPN(.))` or `mtcars %>% ftransform(Sizes = GRPN(list(cyl, vs, am)))` or `GRPN(mtcars, by = ~cyl+vs+am)`. * Added `[.pwcor` and `[.pwcov`, to be able to subset correlation/covariance matrices without loosing the print formatting. # collapse 1.7.5 * Also ensuring tidyverse examples are in `\donttest{}` and building without the *dplyr* testing file to avoid issues with static code analysis on CRAN. * 20-50% Speed improvement in `gsplit` (and therefore in `fsummarise`, `fmutate`, `collap` and `BY` *when invoked with base R functions*) when grouping with `GRP(..., sort = TRUE, return.order = TRUE)`. To enable this by default, the default for argument `return.order` in `GRP` was set to `sort`, which retains the ordering vector (needed for the optimization). Retaining the ordering vector uses up some memory which can possibly adversely affect computations with big data, but with big data `sort = FALSE` usually gives faster results anyway, and you can also always set `return.order = FALSE` (also in `fgroup_by`, `collap`), so this default gives the best of both worlds. * An ancient depreciated argument `sort.row` (replaced by `sort` in 2020) is now removed from `collap`. Also arguments `return.order` and `method` were added to `collap` providing full control of the grouping that happens internally. # collapse 1.7.4 * Tests needed to be adjusted for the upcoming release of *dplyr* 1.0.8 which involves an API change in `mutate`. `fmutate` will not take over these changes i.e. `fmutate(..., .keep = "none")` will continue to work like `dplyr::transmute`. Furthermore, no more tests involving *dplyr* are run on CRAN, and I will also not follow along with any future *dplyr* API changes. * The C-API macro `installTrChar` (used in the new `massign` function) was replaced with `installChar` to maintain backwards compatibility with R versions prior to 3.6.0. Thanks @tedmoorman #213. * Minor improvements to `group()`, providing increased performance for doubles and also increased performance when the second grouping variable is integer, which turned out to be very slow in some instances. # collapse 1.7.3 * Removed tests involving the *weights* package (which is not available on R-devel CRAN checks). * `fgroup_by` is more flexible, supporting computing columns e.g. `fgroup_by(GGDC10S, Variable, Decade = floor(Year / 10) * 10)` and various programming options e.g. `fgroup_by(GGDC10S, 1:3)`, `fgroup_by(GGDC10S, c("Variable", "Country"))`, or `fgroup_by(GGDC10S, is.character)`. You can also use column sequences e.g. `fgroup_by(GGDC10S, Country:Variable, Year)`, but this should not be mixed with computing columns. Compute expressions may also not include the `:` function. * More memory efficient attribute handling in C/C++ (using C-API macro `SHALLOW_DUPLICATE_ATTRIB` instead of `DUPLICATE_ATTRIB`) in most places. # collapse 1.7.2 * Ensured that the base pipe `|>` is not used in tests or examples, to avoid errors on CRAN checks with older versions of R. * Also adjusted `psacf` / `pspacf` / `psccf` to take advantage of the faster grouping by `group`. # collapse 1.7.1 * Fixed minor C/C++ issues flagged in CRAN checks. * Added option `ties = "last"` to `fmode`. * Added argument `stable.algo` to `qsu`. Setting `stable.algo = FALSE` toggles a faster calculation of the standard deviation, yielding 2x speedup on large datasets. * *Fast Statistical Functions* now internally use `group` for grouping data if both `g` and `TRA` arguments are used, yielding efficiency gains on unsorted data. * Ensured that `fmutate` and `fsummarise` can be called if *collapse* is not attached. # collapse 1.7.0 *collapse* 1.7.0, released mid January 2022, brings major improvements in the computational backend of the package, it's data manipulation capabilities, and a whole set of new functions that enable more flexible and memory efficient R programming - significantly enhancing the language itself. For the vast majority of codes, updating to 1.7 should not cause any problems. ### Changes to functionality * `num_vars` is now implemented in C, yielding a massive performance increase over checking columns using `vapply(x, is.numeric, logical(1))`. It selects columns where `(is.double(x) || is.integer(x)) && !is.object(x)`. This provides the same results for most common classes found in data frames (e.g. factors and date columns are not numeric), however it is possible for users to define methods for `is.numeric` for other objects, which will not be respected by `num_vars` anymore. A prominent example are base R's 'ts' objects i.e. `is.numeric(AirPassengers)` returns `TRUE`, but `is.object(AirPassengers)` is also `TRUE` so the above yields `FALSE`, implying - if you happened to work with data frames of 'ts' columns - that `num_vars` will now not select those anymore. Please make me aware if there are other important classes that are found in data frames and where `is.numeric` returns `TRUE`. `num_vars` is also used internally in `collap` so this might affect your aggregations. * In `flag`, `fdiff` and `fgrowth`, if a plain numeric vector is passed to the `t` argument such that `is.double(t) && !is.object(t)`, it is coerced to integer using `as.integer(t)` and directly used as time variable, rather than applying ordered grouping first. This is to avoid the inefficiency of grouping, and owes to the fact that in most data imported into R with various packages, the time (year) variables are coded as double although they should be integer (I also don't know of any cases where time needs to be indexed by a non-date variable with decimal places). Note that the algorithm internally handles irregularity in the time variable so this is not a problem. Should this break any code, kindly raise an issue on GitHub. * The function `setrename` now truly renames objects by reference (without creating a shallow copy). The same is true for `vlabels<-` (which was rewritten in C) and a new function `setrelabel`. Thus additional care needs to be taken (with use inside functions etc.) as the renaming will take global effects unless a shallow copy of the data was created by some prior operation inside the function. If in doubt, better use `frename` which creates a shallow copy. * Some improvements to the `BY` function, both in terms of performance and security. Performance is enhanced through a new C function `gsplit`, providing split-apply-combine computing speeds competitive with *dplyr* on a much broader range of R objects. Regarding Security: if the result of the computation has the same length as the original data, names / rownames and grouping columns (for grouped data) are only added to the result object if known to be valid, i.e. if the data was originally sorted by the grouping columns (information recorded by `GRP.default(..., sort = TRUE)`, which is called internally on non-factor/GRP/qG objects). This is because `BY` does not reorder data after the split-apply-combine step (unlike `dplyr::mutate`); data are simply recombined in the order of the groups. Because of this, in general, `BY` should be used to compute summary statistics (unless data are sorted before grouping). The added security makes this explicit. * Added a method `length.GRP` giving the length of a grouping object. This could break code calling `length` on a grouping object before (which just returned the length of the list). * Functions renamed in collapse 1.6.0 will now print a message telling you to use the updated names. The functions under the old names will stay around for 1-3 more years. * The passing of argument `order` instead of `sort` in function `GRP` (from a very early version of collapse), is now disabled. ### Bug Fixes * Fixed a bug in some functions using Welfords Online Algorithm (`fvar`, `fsd`, `fscale` and `qsu`) to calculate variances, occurring when initial or final zero weights caused the running sum of weights in the algorithm to be zero, yielding a division by zero and `NA` as output although a value was expected. These functions now skip zero weights alongside missing weights, which also implies that you can pass a logical vector to the weights argument to very efficiently calculate statistics on a subset of data (e.g. using `qsu`). ### Additions #### Basic Computational Infrastructure * Function `group` was added, providing a low-level interface to a new unordered grouping algorithm based on hashing in C and optimized for R's data structures. The algorithm was heavily inspired by the great `kit` package of Morgan Jacob, and now feeds into the package through multiple central functions (including `GRP` / `fgroup_by`, `funique` and `qF`) when invoked with argument `sort = FALSE`. It is also used in internal groupings performed in data transformation functions such as `fwithin` (when no factor or 'GRP' object is provided to the `g` argument). The speed of the algorithm is very promising (often superior to `radixorder`), and it could be used in more places still. I welcome any feedback on it's performance on different datasets. * Function `gsplit` provides an efficient alternative to `split` based on grouping objects. It is used as a new backend to `rsplit` (which also supports data frame) as well as `BY`, `collap`, `fsummarise` and `fmutate` - for more efficient grouped operations with functions external to the package. * Added multiple functions to facilitate memory efficient programming (written in C). These include elementary mathematical operations by reference (`setop`, `%+=%`, `%-=%`, `%*=%`, `%/=%`), supporting computations involving integers and doubles on vectors, matrices and data frames (including row-wise operations via `setop`) with no copies at all. Furthermore a set of functions which check a single value against a vector without generating logical vectors: `whichv`, `whichNA` (operators `%==%` and `%!=%` which return indices and are significantly faster than `==`, especially inside functions like `fsubset`), `anyv` and `allv` (`allNA` was already added before). Finally, functions `setv` and `copyv` speed up operations involving the replacement of a value (`x[x == 5] <- 6`) or of a sequence of values from a equally sized object (`x[x == 5] <- y[x == 5]`, or `x[ind] <- y[ind]` where `ind` could be pre-computed vectors or indices) in vectors and data frames without generating any logical vectors or materializing vector subsets. * Function `vlengths` was added as a more efficient alternative to `lengths` (without method dispatch, simply coded in C). * Function `massign` provides a multivariate version of `assign` (written in C, and supporting all basic vector types). In addition the operator `%=%` was added as an efficient multiple assignment operator. (It is called `%=%` and not `%<-%` to facilitate the translation of Matlab or Python codes into R, and because the [zeallot]() package already provides multiple-assignment operators (`%<-%` and `%->%`), which are significantly more versatile, but orders of magnitude slower than `%=%`) #### High-Level Features * Fully fledged `fmutate` function that provides functionality analogous to `dplyr::mutate` (sequential evaluation of arguments, including arbitrary tagged expressions and `across` statements). `fmutate` is optimized to work together with the packages *Fast Statistical and Data Transformation Functions*, yielding fast, vectorized execution, but also benefits from `gsplit` for other operations. * `across()` function implemented for use inside `fsummarise` and `fmutate`. It is also optimized for *Fast Statistical and Data Transformation Functions*, but performs well with other functions too. It has an additional arguments `.apply = FALSE` which will apply functions to the entire subset of the data instead of individual columns, and thus allows for nesting tibbles and estimating models or correlation matrices by groups etc.. `across()` also supports an arbitrary number of additional arguments which are split and evaluated by groups if necessary. Multiple `across()` statements can be combined with tagged vector expressions in a single call to `fsummarise` or `fmutate`. Thus the computational framework is pretty general and similar to *data.table*, although less efficient with big datasets. * Added functions `relabel` and `setrelabel` to make interactive dealing with variable labels a bit easier. Note that both functions operate by reference. (Through `vlabels<-` which is implemented in C. Taking a shallow copy of the data frame is useless in this case because variable labels are attributes of the columns, not of the frame). The only difference between the two is that `setrelabel` returns the result invisibly. * function shortcuts `rnm` and `mtt` added for `frename` and `fmutate`. `across` can also be abbreviated using `acr`. * Added two options that can be invoked before loading of the package to change the namespace: `options(collapse_mask = c(...))` can be set to export copies of selected (or all) functions in the package that start with `f` removing the leading `f` e.g. `fsubset` -> `subset` (both `fsubset` and `subset` will be exported). This allows masking base R and dplyr functions (even basic functions such as `sum`, `mean`, `unique` etc. if desired) with *collapse*'s fast functions, facilitating the optimization of existing codes and allowing you to work with *collapse* using a more natural namespace. The package has been internally insulated against such changes, but of course they might have major effects on existing codes. Also `options(collapse_F_to_FALSE = FALSE)` can be invoked to get rid of the lead operator `F`, which masks `base::F` (an issue raised by some people who like to use `T`/`F` instead of `TRUE`/`FALSE`). Read the help page `?collapse-options` for more information. ### Improvements * Package loads faster (because I don't fetch functions from some other C/C++ heavy packages in `.onLoad` anymore, which implied unnecessary loading of a lot of DLLs). * `fsummarise` is now also fully featured supporting evaluation of arbitrary expressions and `across()` statements. Note that mixing *Fast Statistical Functions* with other functions in a single expression can yield unintended outcomes, read more at `?fsummarise`. * `funique` benefits from `group` in the default `sort = FALSE`, configuration, providing extra speed and unique values in first-appearance order in both the default and the data frame method, for all data types. * Function `ss` supports both empty `i` or `j`. * The printout of `fgroup_by` also shows minimum and maximum group size for unbalanced groupings. * In `ftransformv/settransformv` and `fcomputev`, the `vars` argument is also evaluated inside the data frame environment, allowing NSE specifications using column names e.g. `ftransformv(data, c(col1, col2:coln), FUN)`. * `qF` with option `sort = FALSE` now generates factors with levels in first-appearance order (instead of a random order assigned by the hash function), and can also be called on an existing factor to recast the levels in first-appearance order. It is also faster with `sort = FALSE` (thanks to `group`). * `finteraction` has argument `sort = FALSE` to also take advantage of `group`. * `rsplit` has improved performance through `gsplit`, and an additional argument `use.names`, which can be used to return an unnamed list. * Speedup in `vtypes` and functions `num_vars`, `cat_vars`, `char_vars`, `logi_vars` and `fact_vars`. Note than `num_vars` behaves slightly differently as discussed above. * `vlabels(<-)` / `setLabels` rewritten in C, giving a ~20x speed improvement. Note that they now operate by reference. * `vlabels`, `vclasses` and `vtypes` have a `use.names` argument. The default is `TRUE` (as before). * `colorder` can rename columns on the fly and also has a new mode `pos = "after"` to place all selected columns after the first selected one, e.g.: `colorder(mtcars, cyl, vs_new = vs, am, pos = "after")`. The `pos = "after"` option was also added to `roworderv`. + `add_stub` and `rm_stub` have an additional `cols` argument to apply a stub to certain columns only e.g. `add_stub(mtcars, "new_", cols = 6:9)`. * `namlab` has additional arguments `N` and `Ndistinct`, allowing to display number of observations and distinct values next to variable names, labels and classes, to get a nice and quick overview of the variables in a large dataset. * `copyMostAttrib` only copies the `"row.names"` attribute when known to be valid. * `na_rm` can now be used to efficiently remove empty or `NULL` elements from a list. * `flag`, `fdiff` and `fgrowth` produce less messages (i.e. no message if you don't use a time variable in grouped operations, and messages about computations on highly irregular panel data only if data length exceeds 10 million obs.). * The print methods of `pwcor` and `pwcov` now have a `return` argument, allowing users to obtain the formatted correlation matrix, for exporting purposes. * `replace_NA`, `recode_num` and `recode_char` have improved performance and an additional argument `set` to take advantage of `setv` to change (some) data by reference. For `replace_NA`, this feature is mature and setting `set = TRUE` will modify all selected columns in place and return the data invisibly. For `recode_num` and `recode_char` only a part of the transformations are done by reference, thus users will still have to assign the data to preserve changes. In the future, this will be improved so that `set = TRUE` toggles all transformations to be done by reference. # collapse 1.6.5 * Use of `VECTOR_PTR` in C API now gives an error on R-devel even if `USE_RINTERNALS` is defined. Thus this patch gets rid of all remaining usage of this macro to avoid errors on CRAN checks using the development version of R. * The print method for `qsu` now uses an apostrophe (') to designate million digits, instead of a comma (,). This is to avoid confusion with the decimal point, and the typical use of (,) for thousands (which I don't like). # collapse 1.6.4 Checks on the gcc11 compiler flagged an additional issue with a pointer pointing to element -1 of a C array (which I had done on purpose to index it with an R integer vector). # collapse 1.6.3 CRAN checks flagged a valgrind issue because of comparing an uninitialized value to something. # collapse 1.6.2 CRAN maintainers have asked me to remove a line in a Makevars file intended to reduce the size of Rcpp object files (which has been there since version 1.4). So the installed size of the package may now be larger. # collapse 1.6.1 A patch for 1.6.0 which fixes issues flagged by CRAN and adds a few handy extras. ### Bug Fixes * Puts examples using the new base pipe `|>` inside `\donttest{}` so that they don't fail CRAN tests on older R versions. * Fixes a LTO issue caused by a small mistake in a header file (which does not have any implications to the user but was detected by CRAN checks). ### Additions * Added a function `fcomputev`, which allows selecting columns and transforming them with a function in one go. The `keep` argument can be used to add columns to the selection that are not transformed. * Added a function `setLabels` as a wrapper around `vlabels<-` to facilitate setting variable labels inside pipes. * Function `rm_stub` now has an argument `regex = TRUE` which triggers a call to `gsub` and allows general removing of character sequences in column names on the fly. ### Improvements * `vlabels<-` and `setLabels` now support list of variable labels or other attributes (i.e. the `value` is internally subset using `[[`, not `[`). Thus they are now general functions to attach a vector or list of attributes to columns in a list / data frame. # collapse 1.6.0 *collapse* 1.6.0, released end of June 2021, presents some significant improvements in the user-friendliness, compatibility and programmability of the package, as well as a few function additions. ### Changes to Functionality * `ffirst`, `flast`, `fnobs`, `fsum`, `fmin` and `fmax` were rewritten in C. The former three now also support list columns (where `NULL` or empty list elements are considered missing values when `na.rm = TRUE`), and are extremely fast for grouped aggregation if `na.rm = FALSE`. The latter three also support and return integers, with significant performance gains, even compared to base R. Code using these functions expecting an error for list-columns or expecting double output even if the input is integer should be adjusted. * *collapse* now directly supports *sf* data frames through functions like `fselect`, `fsubset`, `num_vars`, `qsu`, `descr`, `varying`, `funique`, `roworder`, `rsplit`, `fcompute` etc., which will take along the geometry column even if it is not explicitly selected (mirroring *dplyr* methods for *sf* data frames). This is mostly done internally at C-level, so functions remain simple and fast. Existing code that explicitly selects the geometry column is unaffected by the change, but code of the form `sf_data %>% num_vars %>% qDF %>% ...`, where columns excluding geometry were selected and the object later converted to a data frame, needs to be rewritten as `sf_data %>% qDF %>% num_vars %>% ...`. A short vignette was added describing the integration of *collapse* and *sf*. * I've received several requests for increased namespace consistency. *collapse* functions were named to be consistent with base R, *dplyr* and *data.table*, resulting in names like `is.Date`, `fgroup_by` or `settransformv`. To me this makes sense, but I've been convinced that a bit more consistency is advantageous. Towards that end I have decided to eliminate the '.' notation of base R and to remove some unexpected capitalizations in function names giving some people the impression I was using camel-case. The following functions are renamed: `fNobs` -> `fnobs`, `fNdistinct` -> `fndistinct`, `pwNobs` -> `pwnobs`, `fHDwithin` -> `fhdwithin`, `fHDbetween` -> `fhdbetween`, `as.factor_GRP` -> `as_factor_GRP`, `as.factor_qG` -> `as_factor_qG`, `is.GRP` -> `is_GRP`, `is.qG` -> `is_qG`, `is.unlistable` -> `is_unlistable`, `is.categorical` -> `is_categorical`, `is.Date` -> `is_date`, `as.numeric_factor` -> `as_numeric_factor`, `as.character_factor` -> `as_character_factor`, `Date_vars` -> `date_vars`. This is done in a very careful manner, the others will stick around for a long while (end of 2022), and the generics of `fNobs`, `fNdistinct`, `fHDbetween` and `fHDwithin` will be kept in the package for an indeterminate period, but their core methods will not be exported beyond 2022. I will start warning about these renamed functions in 2022. In the future I will undogmatically stick to a function naming style with lowercase function names and underslashes where words need to be split. Other function names will be kept. To say something about this: The quick-conversion functions `qDF` `qDT`, `qM`, `qF`, `qG` are consistent and in-line with *data.table* (`setDT` etc.), and similarly the operators `L`, `F`, `D`, `Dlog`, `G`, `B`, `W`, `HDB`, `HDW`. I'll keep `GRP`, `BY` and `TRA`, for lack of better names, parsimony and because they are central to the package. The camel case will be kept in helper functions `setDimnames` etc. because they work like *stats* `setNames` and do not modify the argument by reference (like `settransform` or `setrename` and various *data.table* functions). Functions `copyAttrib` and `copyMostAttrib` are exports of like-named functions in the C API and thus kept as they are. Finally, I want to keep `fFtest` the way it is because the F-distribution is widely recognized by a capital F. * I've updated the `wlddev` dataset with the latest data from the World Bank, and also added a variable giving the total population (which may be useful e.g. for population-weighted aggregations across regions). The extra column could invalidate codes used to demonstrate something (I had to adjust some examples, tests and code in vignettes). ### Additions * Added a function `fcumsum` (written in C), permitting flexible (grouped, ordered) cumulative summations on matrix-like objects (integer or double typed) with extra methods for grouped data frames and panel series and data frames. Apart from the internal grouping, and an ordering argument allowing cumulative sums in a different order than data appear, `fcumsum` has 2 options to deal with missing values. The default (`na.rm = TRUE`) is to skip (preserve) missing values, whereas setting `fill = TRUE` allows missing values to be populated with the previous value of the cumulative sum (starting from 0). * Added a function `alloc` to efficiently generate vectors initialized with any value (faster than `rep_len`). * Added a function `pad` to efficiently pad vectors / matrices / data.frames with a value (default is `NA`). This function was mainly created to make it easy to expand results coming from a statistical model fitted on data with missing values to the original length. For example let `data <- na_insert(mtcars); mod <- lm(mpg ~ cyl, data)`, then we can do `settransform(data, resid = pad(resid(mod), mod$na.action))`, or we could do `pad(model.matrix(mod), mod$na.action)` or `pad(model.frame(mod), mod$na.action)` to receive matrices and data frames from model data matching the rows of `data`. `pad` is a general function that will also work with mixed-type data. It is also possible to pass a vector of indices matching the rows of the data to `pad`, in which case `pad` will fill gaps in those indices with a value/row in the data. ### Improvements * Full *data.table* support, including reference semantics (`set*`, `:=`)!! There is some complex C-level programming behind *data.table*'s operations by reference. Notably, additional (hidden) column pointers are allocated to be able to add columns without taking a shallow copy of the *data.table*, and an `".internal.selfref"` attribute containing an external pointer is used to check if any shallow copy was made using base R commands like `<-`. This is done to avoid even a shallow copy of the *data.table* in manipulations using `:=` (and is in my opinion not worth it as even large tables are shallow copied by base R (>=3.1.0) within microseconds and all of this complicates development immensely). Previously, *collapse* treated *data.table*'s like any other data frame, using shallow copies in manipulations and preserving the attributes (thus ignoring how *data.table* works internally). This produced a warning whenever you wanted to use *data.table* reference semantics (`set*`, `:=`) after passing the *data.table* through a *collapse* function such as `collap`, `fselect`, `fsubset`, `fgroup_by` etc. From v1.6.0, I have adopted essential C code from *data.table* to do the overallocation and generate the `".internal.selfref"` attribute, thus seamless workflows combining *collapse* and *data.table* are now possible. This comes at a cost of about 2-3 microseconds per function, as to do this I have to shallow copy the *data.table* again and add extra column pointers and an `".internal.selfref"` attribute telling *data.table* that this table was not copied (it seems to be the only way to do it for now). This integration encompasses all data manipulation functions in *collapse*, but not the *Fast Statistical Functions* themselves. Thus you can do `agDT <- DT %>% fselect(id, col1:coln) %>% collap(~id, fsum); agDT[, newcol := 1]`, but you would need to do add a `qDT` after a function like `fsum` if you want to use reference semantics without incurring a warning: `agDT <- DT %>% fselect(id, col1:coln) %>% fgroup_by(id) %>% fsum %>% qDT; agDT[, newcol := 1]`. *collapse* appears to be the first package that attempts to account for *data.table*'s internal working without importing *data.table*, and `qDT` is now the fastest way to create a fully functional *data.table* from any R object. A global option `"collapse_DT_alloccol"` was added to regulate how many columns *collapse* overallocates when creating *data.table*'s. The default is 100, which is lower than the *data.table* default of 1024. This was done to increase efficiency of the additional shallow copies, and may be changed by the user. * Programming enabled with `fselect` and `fgroup_by` (you can now pass vectors containing column names or indices). Note that instead of `fselect` you should use `get_vars` for standard eval programming. * `fselect` and `fsubset` support in-place renaming, e.g. `fselect(data, newname = var1, var3:varN)`, `fsubset(data, vark > varp, newname = var1, var3:varN)`. * `collap` supports renaming columns in the custom argument, e.g. `collap(data, ~ id, custom = list(fmean = c(newname = "var1", "var2"), fmode = c(newname = 3), flast = is_date))`. * Performance improvements: `fsubset` / `ss` return the data or perform a simple column subset without deep copying the data if all rows are selected through a logical expression. `fselect` and `get_vars`, `num_vars` etc. are slightly faster through data frame subsetting done fully in C. `ftransform` / `fcompute` use `alloc` instead of `base::rep` to replicate a scalar value which is slightly more efficient. * `fcompute` now has a `keep` argument, to preserve several existing columns when computing columns on a data frame. * `replace_NA` now has a `cols` argument, so we can do `replace_NA(data, cols = is.numeric)`, to replace `NA`'s in numeric columns. I note that for big numeric data `data.table::setnafill` is the most efficient solution. * `fhdbetween` and `fhdwithin` have an `effect` argument in *plm* methods, allowing centering on selected identifiers. The default is still to center on all panel identifiers. * The plot method for panel series matrices and arrays `plot.psmat` was improved slightly. It now supports custom colours and drawing of a grid. * `settransform` and `settransformv` can now be called without attaching the package e.g. `collapse::settransform(data, ...)`. These errored before when *collapse* is not loaded because they are simply wrappers around `data <- ftransform(data, ...)`. I'd like to note from a [discussion](https://github.com/SebKrantz/collapse/issues/136) that avoiding shallow copies with `<-` (e.g. via `:=`) does not appear to yield noticeable performance gains. Where *data.table* is faster on big data this mostly has to do with parallelism and sometimes with algorithms, generally not memory efficiency. * Functions `setAttrib`, `copyAttrib` and `copyMostAttrib` only make a shallow copy of lists, not of atomic vectors (which amounts to doing a full copy and is inefficient). Thus atomic objects are now modified in-place. * Small improvements: Calling `qF(x, ordered = FALSE)` on an ordered factor will remove the ordered class, the operators `L`, `F`, `D`, `Dlog`, `G`, `B`, `W`, `HDB`, `HDW` and functions like `pwcor` now work on unnamed matrices or data frames. # collapse 1.5.3 * A test that occasionally fails on Mac is removed, and all unit testing is now removed from CRAN. *collapse* has close to 10,000 unit tests covering all central pieces of code. Half of these tests depend on generated data, and for some reasons there is always a test or two that occasionally fail on some operating system (usually not Windows), requiring me to submit a patch. This is not constructive to either the development or the use of this package, therefore tests are now removed from CRAN. They are still run on codecov.io, and every new release is thoroughly tested on Windows. # collapse 1.5.2 ### Changes to Functionality * The first argument of `ftransform` was renamed to `.data` from `X`. This was done to enable the user to transform columns named "X". For the same reason the first argument of `frename` was renamed to `.x` from `x` (not `.data` to make it explicit that `.x` can be any R object with a "names" attribute). It is not possible to depreciate `X` and `x` without at the same time undoing the benefits of the argument renaming, thus this change is immediate and code breaking in rare cases where the first argument is explicitly set. * The function `is.regular` to check whether an R object is atomic or list-like is depreciated and will be removed before the end of the year. This was done to avoid a namespace clash with the *zoo* package (#127). ### Bug Fixes * `unlist2d` produced a subsetting error if an empty list was present in the list-tree. This is now fixed, empty or `NULL` elements in the list-tree are simply ignored (#99). ### Additions * A function `fsummarize` was added to facilitate translating *dplyr* / *data.table* code to *collapse*. Like `collap`, it is only very fast when used with the *Fast Statistical Functions*. * A function `t_list` is made available to efficiently transpose lists of lists. ### Improvements * C files are compiled -O3 on Windows, which gives a boost of around 20% for the grouping mechanism applied to character data. # collapse 1.5.1 A small patch for 1.5.0 that: * Fixes a numeric precision issue when grouping doubles (e.g. before `qF(wlddev$LIFEEX)` gave an error, now it works). * Fixes a minor issue with `fhdwithin` when applied to *pseries* and `fill = FALSE`. # collapse 1.5.0 *collapse* 1.5.0, released early January 2021, presents important refinements and some additional functionality. ### Back to CRAN * I apologize for inconveniences caused by the temporal archival of *collapse* from December 19, 2020. This archival was caused by the archival of the important *lfe* package on the 4th of December. *collapse* depended on *lfe* for higher-dimensional centering, providing the `fhdbetween / fhdwithin` functions for generalized linear projecting / partialling out. To remedy the damage caused by the removal of *lfe*, I had to rewrite `fhdbetween / fhdwithin` to take advantage of the demeaning algorithm provided by *fixest*, which has some quite different mechanics. Beforehand, I made some significant changes to `fixest::demean` itself to make this integration happen. The CRAN deadline was the 18th of December, and I realized too late that I would not make this. A request to CRAN for extension was declined, so *collapse* got archived on the 19th. I have learned from this experience, and *collapse* is now sufficiently insulated that it will not be taken off CRAN even if all suggested packages were removed from CRAN. ### Bug Fixes * Segfaults in several *Fast Statistical Functions* when passed `numeric(0)` are fixed (thanks to @eshom and @acylam, [#101](https://github.com/SebKrantz/collapse/issues/101)). The default behavior is that all *collapse* functions return `numeric(0)` again, except for `fnobs`, `fndistinct` which return `0L`, and `fvar`, `fsd` which return `NA_real_`. ### Changes to Functionality * Functions `fhdwithin / HDW` and `fhdbetween / HDB` have been reworked, delivering higher performance and greater functionality: For higher-dimensional centering and heterogeneous slopes, the `demean` function from the *fixest* package is imported (conditional on the availability of that package). The linear prediction and partialling out functionality is now built around `flm` and also allows for weights and different fitting methods. * In `collap`, the default behavior of `give.names = "auto"` was altered when used together with the `custom` argument. Before the function name was always added to the column names. Now it is only added if a column is aggregated with two different functions. I apologize if this breaks any code dependent on the new names, but this behavior just better reflects most common use (applying only one function per column), as well as STATA's collapse. * For list processing functions like `get_elem`, `has_elem` etc. the default for the argument `DF.as.list` was changed from `TRUE` to `FALSE`. This means if a nested lists contains data frame's, these data frame's will not be searched for matching elements. This default also reflects the more common usage of these functions (extracting entire data frame's or computed quantities from nested lists rather than searching / subsetting lists of data frame's). The change also delivers a considerable performance gain. * Vignettes were outsourced to the [website](). This nearly halves the size of the source package, and should induce users to appreciate the built-in documentation. The website also makes for much more convenient reading and navigation of these book-style vignettes. ### Additions * Added a set of 10 operators `%rr%`, `%r+%`, `%r-%`, `%r*%`, `%r/%`, `%cr%`, `%c+%`, `%c-%`, `%c*%`, `%c/%` to facilitate and speed up row- and column-wise arithmetic operations involving a vector and a matrix / data frame / list. For example `X %r*% v` efficiently multiplies every row of `X` with `v`. Note that more advanced functionality is already provided in `TRA()`, `dapply()` and the *Fast Statistical Functions*, but these operators are intuitive and very convenient to use in matrix or matrix-style code, or in piped expressions. * Added function `missing_cases` (opposite of `complete.cases` and faster for data frame's / lists). * Added function `allNA` for atomic vectors. * New vignette about using *collapse* together with *data.table*, available [online](). ### Improvements * Time series functions and operators `flag / L / F`, `fdiff / D / Dlog` and `fgrowth / G` now natively support irregular time series and panels, and feature a 'complete approach' i.e. values are shifted around taking full account of the underlying time-dimension! * Functions `pwcor` and `pwcov` can now compute weighted correlations on the pairwise or complete observations, supported by C-code that is (conditionally) imported from the *weights* package. * `fFtest` now also supports weights. * `collap` now provides an easy workaround to aggregate some columns using weights and others without. The user may simply append the names of *Fast Statistical Functions* with `_uw` to disable weights. Example: `collapse::collap(mtcars, ~ cyl, custom = list(fmean_uw = 3:4, fmean = 8:10), w = ~ wt)` aggregates columns 3 through 4 using a simple mean and columns 8 through 10 using the weighted mean. * The parallelism in `collap` using `parallel::mclapply` has been reworked to operate at the column-level, and not at the function level as before. It is still not available for Windows though. The default number of cores was set to `mc.cores = 2L`, which now gives an error on windows if `parallel = TRUE`. * function `recode_char` now has additional options `ignore.case` and `fixed` (passed to `grepl`), for enhanced recoding character data based on regular expressions. * `rapply2d` now has `classes` argument permitting more flexible use. * `na_rm` and some other internal functions were rewritten in C. `na_rm` is now 2x faster than `x[!is.na(x)]` with missing values and 10x faster without missing values. # collapse 1.4.2 * An improvement to the `[.GRP_df` method enabling the use of most *data.table* methods (such as `:=`) on a grouped *data.table* created with `fgroup_by`. * Some documentation updates by Kevin Tappe. # collapse 1.4.1 collapse 1.4.1 is a small patch for 1.4.0 that: * fixes clang-UBSAN and rchk issues in 1.4.0 (minor bugs in compiled code resulting, in this case, from trying to coerce a `NaN` value to integer, and failing to protect a shallow copy of a variable). * Adds a method `[.GRP_df` that allows robust subsetting of grouped objects created with `fgroup_by` (thanks to Patrice Kiener for flagging this). # collapse 1.4.0 *collapse* 1.4.0, released early November 2020, presents some important refinements, particularly in the domain of attribute handling, as well as some additional functionality. The changes make *collapse* smarter, more broadly compatible and more secure, and should not break existing code. ### Changes to Functionality * *Deep Matrix Dispatch / Extended Time Series Support:* The default methods of all statistical and transformation functions dispatch to the matrix method if `is.matrix(x) && !inherits(x, "matrix")` evaluates to `TRUE`. This specification avoids invoking the default method on classed matrix-based objects (such as multivariate time series of the *xts* / *zoo* class) not inheriting a 'matrix' class, while still allowing the user to manually call the default method on matrices (objects with implicit or explicit 'matrix' class). The change implies that *collapse*'s generic statistical functions are now well suited to transform *xts* / *zoo* and many other time series and matrix-based classes. * *Fully Non-Destructive Piped Workflow:* `fgroup_by(x, ...)` now only adds a class *grouped_df*, not classes *table_df*, *tbl*, *grouped_df*, and preserves all classes of `x`. This implies that workflows such as `x %>% fgroup_by(...) %>% fmean` etc. yields an object `xAG` of the same class and attributes as `x`, not a tibble as before. *collapse* aims to be as broadly compatible, class-agnostic and attribute preserving as possible. * *Thorough and Controlled Object Conversions:* Quick conversion functions `qDF`, `qDT` and `qM` now have additional arguments `keep.attr` and `class` providing precise user control over object conversions in terms of classes and other attributes assigned / maintained. The default (`keep.attr = FALSE`) yields *hard* conversions removing all but essential attributes from the object. E.g. before `qM(EuStockMarkets)` would just have returned `EuStockMarkets` (because `is.matrix(EuStockMarkets)` is `TRUE`) whereas now the time series class and 'tsp' attribute are removed. `qM(EuStockMarkets, keep.attr = TRUE)` returns `EuStockMarkets` as before. * *Smarter Attribute Handling:* Drawing on the guidance given in the R Internals manual, the following standards for optimal non-destructive attribute handling are formalized and communicated to the user: + The default and matrix methods of the *Fast Statistical Functions* preserve attributes of the input in grouped aggregations ('names', 'dim' and 'dimnames' are suitably modified). If inputs are classed objects (e.g. factors, time series, checked by `is.object`), the class and other attributes are dropped. Simple (non-grouped) aggregations of vectors and matrices do not preserve attributes, unless `drop = FALSE` in the matrix method. An exemption is made in the default methods of functions `ffirst`, `flast` and `fmode`, which always preserve the attributes (as the input could well be a factor or date variable). + The data frame methods are unaltered: All attributes of the data frame and columns in the data frame are preserved unless the computation result from each column is a scalar (not computing by groups) and `drop = TRUE` (the default). + Transformations with functions like `flag`, `fwithin`, `fscale` etc. are also unaltered: All attributes of the input are preserved in the output (regardless of whether the input is a vector, matrix, data.frame or related classed object). The same holds for transformation options modifying the input ("-", "-+", "/", "+", "\*", "%%", "-%%") when using `TRA()` function or the `TRA = "..."` argument to the *Fast Statistical Functions*. + For `TRA` 'replace' and 'replace_fill' options, the data type of the STATS is preserved, not of x. This provides better results particularly with functions like `fnobs` and `fndistinct`. E.g. previously `fnobs(letters, TRA = "replace")` would have returned the observation counts coerced to character, because `letters` is character. Now the result is integer typed. For attribute handling this means that the attributes of x are preserved unless x is a classed object and the data types of x and STATS do not match. An exemption to this rule is made if x is a factor and an integer (non-factor) replacement is offered to STATS. In that case the attributes of x are copied exempting the 'class' and 'levels' attribute, e.g. so that `fnobs(iris$Species, TRA = "replace")` gives an integer vector, not a (malformed) factor. In the unlikely event that STATS is a classed object, the attributes of STATS are preserved and the attributes of x discarded. * *Reduced Dependency Burden:* The dependency on the *lfe* package was made optional. Functions `fhdwithin` / `fhdbetween` can only perform higher-dimensional centering if *lfe* is available. Linear prediction and centering with a single factor (among a list of covariates) is still possible without installing *lfe*. This change means that *collapse* now only depends on base R and *Rcpp* and is supported down to R version 2.10. ### Additions * Added function `rsplit` for efficient (recursive) splitting of vectors and data frames. * Added function `fdroplevels` for very fast missing level removal + added argument `drop` to `qF` and `GRP.factor`, the default is `drop = FALSE`. The addition of `fdroplevels` also enhances the speed of the `fFtest` function. * `fgrowth` supports annualizing / compounding growth rates through added `power` argument. * A function `flm` was added for bare bones (weighted) linear regression fitting using different efficient methods: 4 from base R (`.lm.fit`, `solve`, `qr`, `chol`), using `fastLm` from *RcppArmadillo* (if installed), or `fastLm` from *RcppEigen* (if installed). * Added function `qTBL` to quickly convert R objects to tibble. * helpers `setAttrib`, `copyAttrib` and `copyMostAttrib` exported for fast attribute handling in R (similar to `attributes<-()`, these functions return a shallow copy of the first argument with the set of attributes replaced, but do not perform checks for attribute validity like `attributes<-()`. This can yield large performance gains with big objects). * helper `cinv` added wrapping the expression `chol2inv(chol(x))` (efficient inverse of a symmetric, positive definite matrix via Choleski factorization). * A shortcut `gby` is now available to abbreviate the frequently used `fgroup_by` function. * A print method for grouped data frames of any class was added. ### Improvements * Faster internal methods for factors for `funique`, `fmode` and `fndistinct`. * The *grouped_df* methods for `flag`, `fdiff`, `fgrowth` now also support multiple time variables to identify a panel e.g. `data %>% fgroup_by(region, person_id) %>% flag(1:2, list(month, day))`. * More security features for `fsubset.data.frame` / `ss`, `ss` is now internal generic and also supports subsetting matrices. * In some functions (like `na_omit`), passing double values (e.g. `1` instead of integer `1L`) or negative indices to the `cols` argument produced an error or unexpected behavior. This is now fixed in all functions. * Fixed a bug in helper function `all_obj_equal` occurring if objects are not all equal. * Some performance improvements through increased use of pointers and C API functions. # collapse 1.3.2 collapse 1.3.2, released mid September 2020: * Fixed a small bug in `fndistinct` for grouped distinct value counts on logical vectors. * Additional security for `ftransform`, which now efficiently checks the names of the data and replacement arguments for uniqueness, and also allows computing and transforming list-columns. * Added function `ftransformv` to facilitate transforming selected columns with function - a very efficient replacement for `dplyr::mutate_if` and `dplyr::mutate_at`. * `frename` now allows additional arguments to be passed to a renaming function. # collapse 1.3.1 collapse 1.3.1, released end of August 2020, is a patch for v1.3.0 that takes care of some unit test failures on certain operating systems (mostly because of numeric precision issues). It provides no changes to the code or functionality. # collapse 1.3.0 collapse 1.3.0, released mid August 2020: ### Changes to Functionality * `dapply` and `BY` now drop all unnecessary attributes if `return = "matrix"` or `return = "data.frame"` are explicitly requested (the default `return = "same"` still seeks to preserve the input data structure). * `unlist2d` now saves integer rownames if `row.names = TRUE` and a list of matrices without rownames is passed, and `id.factor = TRUE` generates a normal factor not an ordered factor. It is however possible to write `id.factor = "ordered"` to get an ordered factor id. * `fdiff` argument `logdiff` renamed to `log`, and taking logs is now done in R (reduces size of C++ code and does not generate as many NaN's). `logdiff` may still be used, but it may be deactivated in the future. Also in the matrix and data.frame methods for `flag`, `fdiff` and `fgrowth`, columns are only stub-renamed if more than one lag/difference/growth rate is computed. ### Additions * Added `fnth` for fast (grouped, weighted) n'th element/quantile computations. * Added `roworder(v)` and `colorder(v)` for fast row and column reordering. * Added `frename` and `setrename` for fast and flexible renaming (by reference). * Added function `fungroup`, as replacement for `dplyr::ungroup`, intended for use with `fgroup_by`. * `fmedian` now supports weights, computing a decently fast (grouped) weighted median based on radix ordering. * `fmode` now has the option to compute min and max mode, the default is still simply the first mode. * `fwithin` now supports quasi-demeaning (added argument `theta`) and can thus be used to manually estimate random-effects models. * `funique` is now generic with a default vector and data.frame method, providing fast unique values and rows of data. The default was changed to `sort = FALSE`. * The shortcut `gvr` was created for `get_vars(..., regex = TRUE)`. * A helper `.c` was introduced for non-standard concatenation (i.e. `.c(a, b) == c("a", "b")`). ### Improvements * `fmode` and `fndistinct` have become a bit faster. * `fgroup_by` now preserves *data.table*'s. * `ftransform` now also supports a data.frame as replacement argument, which automatically replaces matching columns and adds unmatched ones. Also `ftransform<-` was created as a more formal replacement method for this feature. * `collap` columns selected through `cols` argument are returned in the order selected if `keep.col.order = FALSE`. Argument `sort.row` is depreciated, and replace by argument `sort`. In addition the `decreasing` and `na.last` arguments were added and handed down to `GRP.default`. * `radixorder` 'sorted' attribute is now always attached. * `stats::D` which is masked when collapse is attached, is now preserved through methods `D.expression` and `D.call`. * `GRP` option `call = FALSE` to omit a call to `match.call` -> minor performance improvement. * Several small performance improvements through rewriting some internal helper functions in C and reworking some R code. * Performance improvements for some helper functions, `setRownames` / `setColnames`, `na_insert` etc. * Increased scope of testing statistical functions. The functionality of the package is now secured by 7700 unit tests covering all central bits and pieces. # collapse 1.2.1 collapse 1.2.1, released end of May 2020: * Minor fixes for 1.2.0 issues that prevented correct installation on Mac OS X and a vignette rebuilding error on solaris. * `fmode.grouped_df` with groups and weights now saves the sum of the weights instead of the max (this makes more sense as the max only applies if all elements are unique). # collapse 1.2.0 collapse 1.2.0, released mid May 2020: ### Changes to Functionality * *grouped_df* methods for fast statistical functions now always attach the grouping variables to the output in aggregations, unless argument `keep.group_vars = FALSE`. (formerly grouping variables were only attached if also present in the data. Code hinged on this feature should be adjusted) * `qF` `ordered` argument default was changed to `ordered = FALSE`, and the `NA` level is only added if `na.exclude = FALSE`. Thus `qF` now behaves exactly like `as.factor`. * `Recode` is depreciated in favor of `recode_num` and `recode_char`, it will be removed soon. Similarly `replace_non_finite` was renamed to `replace_Inf`. * In `mrtl` and `mctl` the argument `ret` was renamed `return` and now takes descriptive character arguments (the previous version was a direct C++ export and unsafe, code written with these functions should be adjusted). * `GRP` argument `order` is depreciated in favor of argument `decreasing`. `order` can still be used but will be removed at some point. ### Bug Fixes * Fixed a bug in `flag` where unused factor levels caused a group size error. ### Additions * Added a suite of functions for fast data manipulation: + `fselect` selects variables from a data frame and is equivalent but much faster than `dplyr::select`. + `fsubset` is a much faster version of `base::subset` to subset vectors, matrices and data.frames. The function `ss` was also added as a faster alternative to `[.data.frame`. + `ftransform` is a much faster update of `base::transform`, to transform data frames by adding, modifying or deleting columns. The function `settransform` does all of that by reference. + `fcompute` is equivalent to `ftransform` but returns a new data frame containing only the columns computed from an existing one. + `na_omit` is a much faster and enhanced version of `base::na.omit`. + `replace_NA` efficiently replaces missing values in multi-type data. * Added function `fgroup_by` as a much faster version of `dplyr::group_by` based on *collapse* grouping. It attaches a 'GRP' object to a data frame, but only works with *collapse*'s fast functions. This allows *dplyr* like manipulations that are fully *collapse* based and thus significantly faster, i.e. `data %>% fgroup_by(g1,g2) %>% fselect(cola,colb) %>% fmean`. Note that `data %>% dplyr::group_by(g1,g2) %>% dplyr::select(cola,colb) %>% fmean` still works, in which case the *dplyr* 'group' object is converted to 'GRP' as before. However `data %>% fgroup_by(g1,g2) %>% dplyr::summarize(...)` does not work. * Added function `varying` to efficiently check the variation of multi-type data over a dimension or within groups. * Added function `radixorder`, same as `base::order(..., method = "radix")` but more accessible and with built-in grouping features. * Added functions `seqid` and `groupid` for generalized run-length type id variable generation from grouping and time variables. `seqid` in particular strongly facilitates lagging / differencing irregularly spaced panels using `flag`, `fdiff` etc. * `fdiff` now supports quasi-differences i.e. $x_t - \rho x_{t-1}$ and quasi-log differences i.e. $log(x_t) - \rho log(x_{t-1})$. an arbitrary $\rho$ can be supplied. * Added a `Dlog` operator for faster access to log-differences. ### Improvements * Faster grouping with `GRP` and faster factor generation with added radix method + automatic dispatch between hash and radix method. `qF` is now ~ 5x faster than `as.factor` on character and around 30x faster on numeric data. Also `qG` was enhanced. * Further slight speed tweaks here and there. * `collap` now provides more control for weighted aggregations with additional arguments `w`, `keep.w` and `wFUN` to aggregate the weights as well. The defaults are `keep.w = TRUE` and `wFUN = fsum`. A specialty of `collap` remains that `keep.by` and `keep.w` also work for external objects passed, so code of the form `collap(data, by, FUN, catFUN, w = data$weights)` will now have an aggregated `weights` vector in the first column. * `qsu` now also allows weights to be passed in formula i.e. `qsu(data, by = ~ group, pid = ~ panelid, w = ~ weights)`. * `fgrowth` has a `scale` argument, the default is `scale = 100` which provides growth rates in percentage terms (as before), but this may now be changed. * All statistical and transformation functions now have a hidden list method, so they can be applied to unclassed list-objects as well. An error is however provided in grouped operations with unequal-length columns. # collapse 1.1.0 collapse 1.1.0 released early April 2020: * Fixed remaining gcc10, LTO and valgrind issues in C/C++ code, and added some more tests (there are now ~ 5300 tests ensuring that *collapse* statistical functions perform as expected). * Fixed the issue that supplying an unnamed list to `GRP()`, i.e. `GRP(list(v1, v2))` would give an error. Unnamed lists are now automatically named 'Group.1', 'Group.2', etc... * Fixed an issue where aggregating by a single id in `collap()` (i.e. `collap(data, ~ id1)`), the id would be coded as factor in the aggregated data.frame. All variables including id's now retain their class and attributes in the aggregated data. * Added weights (`w`) argument to `fsum` and `fprod`. * Added an argument `mean = 0` to `fwithin / W`. This allows simple and grouped centering on an arbitrary mean, `0` being the default. For grouped centering `mean = "overall.mean"` can be specified, which will center data on the overall mean of the data. The logical argument `add.global.mean = TRUE` used to toggle this in *collapse* 1.0.0 is therefore depreciated. * Added arguments `mean = 0` (the default) and `sd = 1` (the default) to `fscale / STD`. These arguments now allow to (group) scale and center data to an arbitrary mean and standard deviation. Setting `mean = FALSE` will just scale data while preserving the mean(s). Special options for grouped scaling are `mean = "overall.mean"` (same as `fwithin / W`), and `sd = "within.sd"`, which will scale the data such that the standard deviation of each group is equal to the within- standard deviation (= the standard deviation computed on the group-centered data). Thus group scaling a panel-dataset with `mean = "overall.mean"` and `sd = "within.sd"` harmonizes the data across all groups in terms of both mean and variance. The fast algorithm for variance calculation toggled with `stable.algo = FALSE` was removed from `fscale`. Welford's numerically stable algorithm used by default is fast enough for all practical purposes. The fast algorithm is still available for `fvar` and `fsd`. * Added the modulus (`%%`) and subtract modulus (`-%%`) operations to `TRA()`. * Added the function `finteraction`, for fast interactions, and `as_character_factor` to coerce a factor, or all factors in a list, to character (analogous to `as_numeric_factor`). Also exported the function `ckmatch`, for matching with error message showing non-matched elements. # collapse 1.0.0 and earlier * First version of the package featuring only the functions `collap` and `qsu` based on code shared by Sebastian Krantz on R-devel, February 2019. * Major rework of the package using Rcpp and data.table internals, introduction of fast statistical functions and operators and expansion of the scope of the package to a broad set of data transformation and exploration tasks. Several iterations of enhancing speed of R code. Seamless integration of *collapse* with *dplyr*, *plm* and *data.table*. CRAN release of *collapse* 1.0.0 on 19th March 2020. collapse/MD50000644000176200001440000002576314201453312012374 0ustar liggesusers545674fc1f181970e47f4adf798f62a9 *DESCRIPTION 3292c7ff0d3614cbcdebb9018d9bbc74 *LICENSE c1f9c65702c8d8d077cca9a91842e626 *NAMESPACE bd84cc902f3979f569e91f4950af0bad *NEWS.md d36aedc11812ebb6ca1f0e9353c98883 *R/BY.R 292543c8b5ab1fecf114e174ef5a12c3 *R/GRP.R 8af062aa51d7edcad72517bac23ee436 *R/RcppExports.R e776247a6df925cdbda69061ae3c3de7 *R/TRA.R 89a0d21b38d7813abc1edef3a63da07d *R/collap.R 48f00b81ac19076c6b038af163c8a41d *R/dapply.R 0935f8053a2c15972faffd124b222ecc *R/descr.R 3bee8dd6b1a21801e918b17126c90a2d *R/fFtest.R 2ca46ddc33078a076dbc46be024a6342 *R/fbetween_fwithin.R 489f05b1dd0d5365234b70068e4beb39 *R/fcumsum.R fcb0d955230eeb2480afd1f186b6535e *R/fdiff_fgrowth.R 392e0a1c412f1d323be90d5272490cd5 *R/ffirst.R 6bc8544c9065d963e22dd4aa44c5d1a1 *R/fhdbetween_fhdwithin.R 885269e88a11011fbd9042aa4393d910 *R/flag.R 5f4d5287f002e7bdabf9c4de42abb573 *R/flast.R 5fc9f073672d9df30461a10643755782 *R/flm.R 596130428e685589ff5a530a84762a12 *R/fmean.R e2bba6132b0f3ff178118564b5701239 *R/fmin_fmax.R 466095d1d96b7d5923bec5d01d4e2ebd *R/fmode.R 09b48861d46f59c7507734daaf765134 *R/fndistinct.R b383cdaa99ca01b519d56aa610a4a992 *R/fnobs.R c533c6a085b76397389e76607a284c5a *R/fnth_fmedian.R 0fcd4dc488e221272278283d3edc45c8 *R/fprod.R 815627b7c3d73904b479c88b1c802549 *R/fscale.R 0683ae24f938c5fbf12eaa2a6f47d524 *R/fsubset_ftransform.R 016f98143ae685883f93145f19a09c15 *R/fsum.R e3b59a3316f2790cfd0b7db498d3fedf *R/fsummarise.R 145076c967ef511a1477b85f6b2a7cd7 *R/fvar_fsd.R bd78f12aa008c2c9d0ba92bc1a472113 *R/global_macros.R 48e315eb3fc2d2e65b793415344ad264 *R/list_functions.R e0ae453e3d36eb4dbd2acebea4926d2b *R/my_RcppExports.R 7d8ddb0f1e91f60231a7dbe45636e945 *R/psacf.R 23874cb9f0b7f510385f8acde63f659f *R/psmat.R 035633cc3cc9e9cb6f2097deb1c3167c *R/pwcor_pwcov_pwnobs.R 2f86a1e1e7be6e613e17551d81491348 *R/qsu.R afdfb44969fed0682a312afaf726f6fb *R/quick_conversion.R f63377d0f845026a0e4bb17d847cd06a *R/recode_replace.R 60c0f20a447754c1f7a4fb984f9ec1f6 *R/roworder_colorder_rename.R 52b162e0416aa2d4883a097e2da8aa96 *R/rsplit.R 355894eeb344eaf0c813e526389ae80e *R/select_replace_add_vars.R d7673224bab38fcbdf22224e3a2bbfb9 *R/small_helper.R 2be6f1f41e6f112b80b148dfc3a58d83 *R/unlist2d.R a85e093b7ea92207f25eb299f5682041 *R/varying.R 909dc9a1de21e8b986e1fb267b5dbe0d *R/zzz.R e6928a0085c8fc94c2f92e4488acc037 *build/vignette.rds a5742d7c7117d4198a262b7079ef3b58 *data/GGDC10S.rda c0728676845d1671c4342e712cad7673 *data/wlddev.rda 0279de8758286cd2403ada8028a14d7c *inst/doc/collapse_documentation.Rmd 622648dcf2d90eb6a79049c09d067ae2 *inst/doc/collapse_documentation.html 64efd7ee33ab184c2a09989e5fc794fc *man/BY.Rd 4ff8ad3382426395fcd26eddc60190bf *man/GGDC10S.Rd 472b6fc6c8c9117bfa6e2c05b768507e *man/GRP.Rd 0cdb1c83be1abdaf8801ddaecb0dc206 *man/TRA.Rd 2d2c30b1bd55a04a3bc5fc44944813a6 *man/across.Rd 7ed45b82b5a3704310b859fdd929dc8b *man/arithmetic.Rd 39480ab2cec9b00d24fc4e546d5b6de7 *man/collap.Rd 0f8b04ef96ba1385980d3eacd32d4c4c *man/collapse-depreciated.Rd 4dda287ef37baacc7bd89e6e7cd6c387 *man/collapse-documentation.Rd 1a3fab65cde4b7531b4b4ed325048348 *man/collapse-options.Rd c83819d5b7269e32c98aee72211025e3 *man/collapse-package.Rd 15a1d011cee598e36f6205bf247a9107 *man/collapse-renamed.Rd 3bbd3005a10dba24c5711bdfb39df059 *man/colorder.Rd f7e392fe57f04caa37c0c5b1e9b1475f *man/dapply.Rd 4c8098a64063406f60deed54b9f957a4 *man/data-transformations.Rd 0cb926b62c40ee00381040c6e7b00088 *man/descr.Rd e6a0154e6b91dae6780f389859f24022 *man/extract_list.Rd 33f89b1a7f2d62a03d9914667b288b14 *man/fFtest.Rd 7edc9cc08296f0a6708f0a7c2405134e *man/fast-data-manipulation.Rd 89ec8b18f6356a1ea2c341f83fb0f355 *man/fast-grouping.Rd 4ec94a3d35e60586d7ddd4423ef9c4a5 *man/fast-statistical-functions.Rd e4afbea7d3ab0beee1768f77ecf95239 *man/fbetween_fwithin.Rd 94b26fc63bbe578fcb3c7139eda43975 *man/fcumsum.Rd f014f729abccbcbecac8063b321840e9 *man/fdiff.Rd 906d0ffdb5039168ac78d2891d116aa2 *man/fdroplevels.Rd 716524642167f211e090807fdd92701b *man/ffirst_flast.Rd bc27a68e95085985391d6a9908b2db99 *man/fgrowth.Rd 2db920d5c6da8e551b2aa1529802bdb8 *man/fhdbetween_fhdwithin.Rd ffcd3bfa8989039d66de8aeebd11052d *man/flag.Rd d9f63af5b3d1ef2a33a3f8474aa8f1df *man/flm.Rd 607aec5e9a6006d3549ae9eb1094689b *man/fmean.Rd 67c3aa3caf575a1ac2dd18bb372d8f11 *man/fmedian.Rd e2859bc8b9923f736180a79615dd5216 *man/fmin_fmax.Rd 4de0058b68307d8fb4465e60657a56e9 *man/fmode.Rd c1dcd59ed869ef9a1371eec83ef003f1 *man/fndistinct.Rd bd18f20653879763d31f8dd256867f31 *man/fnobs.Rd a49be1ed1b574c5c6236f90616d270f9 *man/fnth.Rd 2fcb7362a14d1e78165842a88f3920a6 *man/fprod.Rd bd0a27967a66dd6071bf4374642a6a5b *man/frename.Rd 48e772685ea6c09081818cc72f937d59 *man/fscale.Rd cb207f9a6abb34ba5520fb24bf4fb779 *man/fsubset.Rd 61f7d1c250c99eaa981499aae1acf7ec *man/fsum.Rd c6be6445145e14c8cc30ebcf677a2a7b *man/fsummarise.Rd 7666b3357d87b803f748207aa417565f *man/ftransform.Rd e83d0ae2b930052f615fa6b1eb60ba6b *man/funique.Rd f951be161500f87ab46cb1ced600b511 *man/fvar_fsd.Rd bb1c5dc039c2c1d72da19cb9c9e0ac0c *man/group.Rd f1d9c97c979db4b905624cd089aa372c *man/groupid.Rd b83b4916bad7acea870e8fbab6b231bb *man/is_unlistable.Rd cda587e1d75f9c95af5ae2b57c3b82b1 *man/ldepth.Rd 22dd8c4466f27037d3a4cd4b3273d86a *man/list-processing.Rd c43abb7e12a85ad40b263fc1a7672f50 *man/pad.Rd fa093165085bb6ca03f2c330a29aa0bf *man/programming.Rd b5db05ab250fc8143f5cdfebc9d8d9e7 *man/psacf.Rd 0f7959e0b539f2d064dcb7612feadcdb *man/psmat.Rd 81502cb62806a69f52b885c1060a1386 *man/pwcor_pwcov_pwnobs.Rd 80d62a9f4960fd3dfd67a12c085e71a1 *man/qF.Rd f2ad7a436c81ad0d26e01951decb11a6 *man/qsu.Rd 3987e339a738390f2dcb496746b61e13 *man/quick-conversion.Rd 45b6999f50f551addf9c5783afe23761 *man/radixorder.Rd 1d996abfc2df6e3846ea949b472ebd84 *man/rapply2d.Rd 3af76c247fc7ce381332b010e446d0f2 *man/recode-replace.Rd f161578ce4c2683166e75fa2bc3cd68a *man/roworder.Rd 0157aa18ffdba3668805ceab9c802d68 *man/rsplit.Rd e5c2a86498bb6010d64442b9e982efd1 *man/select_replace_vars.Rd 43653c92a9cb7f39bdad48bc6f5b6584 *man/seqid.Rd 306cffd82e0668ca4f1150294f513bbf *man/small-helper.Rd f49ef5f2d4760646ec0ceb6f6977470b *man/summary-statistics.Rd 4cc5475fdd2340f8c0f22afa51d30045 *man/t_list.Rd 09cc22875e7cdba5f748e64c63332db8 *man/time-series-panel-series.Rd 13c0cf5ea2b3ff45f6a5533b2c8d2155 *man/unlist2d.Rd 4fd4aefb2d08f6dca535229d6078b643 *man/varying.Rd 3d1726481c9c919aeee22bfe647d790f *man/wlddev.Rd 5cf5eb1dabcaaa68584cd969edded19a *src/ExportSymbols.cpp c5b0d43a49a0dd6711cfd011e7635a73 *src/Makevars.win 77c8e577da3e3906a147543492630c7c *src/RcppExports.cpp bfc94e0145905ec756a1808a2943a935 *src/TRA.cpp 37db889b407707d7664bc9f6d9d12905 *src/base_radixsort.c cd34e7507b9a9288d3b4859a43b9bfad *src/base_radixsort.h 021e26af3be95f6dd2e10b3d8e2bca74 *src/collapse.h cb1b9b8263b3c630e373b4a1fdeb4252 *src/collapse_c.h 232dd2189e17839748f57cbb4379b21a *src/data.table.h 7c6f733e3bf4a33d3ee224f362accfa4 *src/data.table_init.c 625ecf4644969fb3781d1fd090511b61 *src/data.table_rbindlist.c 866e3abecd83e9bbe2b42d882be3d217 *src/data.table_subset.c 6f79578e8cd9bb9b020d0811ac79b2b6 *src/data.table_utils.c d1708b3584a8f99e08d9d4308df06f98 *src/fNdistinct.cpp d1b6fd2e1440df1cd9e4d0826d78a168 *src/fbetween_fwithin.cpp 726e56536816aa7aea937bedabe3e648 *src/fbstats.cpp faf1580547ee02682e1ddc64aa8a509b *src/fcumsum.c 4e24bd66a563673f1ef1a269ffb2a027 *src/fdiff_fgrowth.cpp a541ea87b668e10ce50a16279bd02f1d *src/ffirst.c 436f1b66b38a47677d859c89a2c6fa6f *src/flag.cpp e2a046ea457cce899e856d13a4907e02 *src/flast.c 20ad91b4d530b2cfdc719b149967a03b *src/fmean.cpp f11a99335fdae646bbc561802792d885 *src/fmin_fmax.c cca2f3d3410c4dc1d02ad90c49f92d53 *src/fmode.cpp 33cc7df7b40afcc3b8f823c5c18c0ca2 *src/fnobs.c a6e15b1a21c2354ab211bbcf81424004 *src/fnth_fmedian.cpp 545639488207f3587fbdcca92d151eeb *src/fprod.cpp 5fb2b80b661e051212516871490d081c *src/fscale.cpp f846c25961ee05682f1879f1230fbe80 *src/fsum.c 1391629bd803258d81429f1e561e30ae *src/fvar_fsd.cpp edbf1d850db9ad9c18e6fdb883912809 *src/handle_attributes.c 3bc63e1e7c37afec9d124c4160ca398c *src/kit.h 65abc9f11bbb1a5f3d9fa2b347fe6a0b *src/kit_dup.c 0e6239c0fb5f3da430516ea7c921d5b4 *src/mrtl_mctl.cpp c8e1f01967ab74a7ba11c24796544713 *src/psmat.cpp 2bb0170400947aeaef0ffb86bea0b78a *src/pwnobs.cpp 627486e2efb835c811f1dc5a374215df *src/qF_qG.cpp 5fbd1391bc25a718b3481fa03fb4989f *src/seqid_groupid.cpp 891d03f911bb93be14c87c0c71d43f83 *src/small_helper.c 2d56b31f6f138ca3c90bd08ff7ddb4fe *src/stats_mAR.c 1d321c8f41669b6a32a9263d06a369cd *src/stats_pacf.c 02108dfc3a235c06ad970704ccd5d7c6 *src/varying.cpp 19a55251d627da27fa11def2a1aa6794 *tests/testthat.R 69e03134a9784221993a3dcbe962e2ca *tests/testthat/test-BY.R 7b703da8d9c1b708183102624163c64f *tests/testthat/test-GRP.R 743d0721a6056886281f80e887d4fa3f *tests/testthat/test-TRA.R 3183d7e0d12484522658a4ec799d52f5 *tests/testthat/test-attribute-handling.R 4c637c8aab1a68d067a565c9c37fb823 *tests/testthat/test-collap.R 420bc7c33377982946db619db23d5738 *tests/testthat/test-dapply.R 28547a3bbe273ed028438a85c36fae4a *tests/testthat/test-data.table.R fd953ebb221e2102e6b766143b8083a0 *tests/testthat/test-fHDbetween-fHDwithin-HDB-HDW.R 80627596b964d8374aca5f91c603051e *tests/testthat/test-fbetween-fwithin-B-W.R 1a246cef60f6eed9c83b6faa071ac019 *tests/testthat/test-fcumsum.R 815590a3164f65ba9e23730d1d243947 *tests/testthat/test-fdiff-fgrowth-D-G.R accbb1412fcb6393ed6d7a02d52f2e46 *tests/testthat/test-ffirst-flast.R 79c17993b68accd575c84adfceeb2486 *tests/testthat/test-flag-L-F.R 4326335faf9dcbd362e2179c4477f0f2 *tests/testthat/test-flm-fFtest.R 0c536afeeb513d8a4499c7970a6d01c5 *tests/testthat/test-fmean.R d940f941ce5112113f9aa7de62e20077 *tests/testthat/test-fmedian.R 7b62898b7dabd8b71255ef32a25e257d *tests/testthat/test-fmin-fmax.R 2904f296d9f607e779b3c65d9323efab *tests/testthat/test-fmode.R 98f26ea7ccfa79bdc8d5bdbc41e2b0ab *tests/testthat/test-fnobs-fndistinct.R 7a65ac182bec98256013b5eefd377f47 *tests/testthat/test-fprod.R 33798402b615991daad92c0d8249a2db *tests/testthat/test-fscale-STD.R 1d75a1ed6dc7abfaa5a163b0d09b9d39 *tests/testthat/test-fsubset-ftransform.R ca446b29af2ec8f4e489529fbc99c011 *tests/testthat/test-fsum.R 85a768e6259437cb0ef30d8a4a79d8ab *tests/testthat/test-fvar-fsd.R 6133f1036eb161ccfb8c8f46724ab8e4 *tests/testthat/test-list-processing.R 174bb7b3a0e0e512d7d4a252935e9b3c *tests/testthat/test-misc.R 14bd2d732caa37d919272531f109dee2 *tests/testthat/test-miscellaneous-issues.R 9dbd44145f974a073ca19ff37c554f8e *tests/testthat/test-psmat-psacf.R b557073b369a4d6483cf1b37daf6e76c *tests/testthat/test-qsu.R 451c22a058c23239c1d48a1da2d25304 *tests/testthat/test-quick-conversion.R da2ce7c50d67fa407c791bc619ca040b *tests/testthat/test-recode-replace.R f083528bc9048d6cf7f598bf0bd4e65b *tests/testthat/test-roworder-colorder-rename.R 27cbf17601d3ca4693cc2f87ac5aa133 *tests/testthat/test-select-replace-vars.R 897cfabe82d90df20293396cbd80e65e *tests/testthat/test-seqid-groupid.R c55b480fe473bf92d0da9ec8bba0fb00 *tests/testthat/test-setop.R e955627f8d63c1ea1fcad19368ebf162 *tests/testthat/test-sf.R 027f2525cc6df148e9066325b7dce865 *tests/testthat/test-splitting.R 765307e15065edd4ed7d2de3dc91ef19 *tests/testthat/test-varying.R 7bb3bf5c4c7803fe0733efb386824f87 *tests/testthat/test-whichv.R 0279de8758286cd2403ada8028a14d7c *vignettes/collapse_documentation.Rmd collapse/inst/0000755000176200001440000000000014201327662013034 5ustar liggesuserscollapse/inst/doc/0000755000176200001440000000000014201327662013601 5ustar liggesuserscollapse/inst/doc/collapse_documentation.Rmd0000644000176200001440000001067014174223734021010 0ustar liggesusers--- title: "*collapse* Documentation and Resources" author: "Sebastian Krantz" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: vignette: > %\VignetteIndexEntry{collapse Documentation and Resources} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- *collapse* is a C/C++ based package for data transformation and statistical computing in R. It's aims are: 1. To facilitate complex data transformation, exploration and computing tasks in R. 2. To help make R code fast, flexible, parsimonious and programmer friendly. Documentation comes in 4 different forms: ## Built-In Structured Documentation After installing *collapse*, you can call `help("collapse-documentation")` which will produce a central help page providing a broad overview of the entire functionality of the package, including direct links to all function documentation pages and links to 13 further topical documentation pages describing how clusters of related functions work together. The names of these additional help pages are contained in a global macro `.COLLAPSE_TOPICS` and can so easily be called from the R console as well. Function documentation is interlinked with the relevant topical pages, and all documentation pages link back to the central overview page at `help("collapse-documentation")`. Thus *collapse* comes with a fully structured hierarchical documentation which you can browse within R - and that provides everything necessary to fully understand the package. The Documentation is also available [online](). The package page under `help("collapse-package")` additionally provides some more general information about the package and its design philosophy, as well as a very compact set of examples covering important functionality (which lack features introduced in 1.7 though). Reading `help("collapse-package")` and `help("collapse-documentation")` and working through the examples on `help("collapse-package")` is the fastest way to get acquainted with the package. `help("collapse-documentation")` is also the most up-to-date documentation of the package at the time of releasing 1.7 (January 2022). ## Vignettes There are also 5 vignettes which are available [online]() (due to their size and the enhanced browsing experience on the website). The vignettes are: * [**Introduction to *collapse* **](): Introduces all main features in a structured way * [***collapse* and *dplyr* **](): Demonstrates the integration of collapse with *dplyr* / *tidyverse* workflows and associated performance improvements * [***collapse* and *plm***](): Demonstrates the integration of collapse with *plm* and shows examples of efficient programming with panel data * [***collapse* and *data.table***](): Shows how collapse and *data.table* may be used together in a harmonious way * [***collapse* and *sf***](): Shows how collapse can be used to efficiently manipulate *sf* data frames Note that these vignettes currently (January 2022) do not cover features introduced in version 1.7. They have been updated if you see a 2022 in the date of the vignette. ## Blog I maintain a [blog]() linked to [Rbloggers.com]() where I introduced *collapse* with some compact posts covering central functionality. Among these, the post about [programming with *collapse*]() is highly recommended for ambitious users and developers willing to build on *collapse*, as it exposes to some degree how central parts of *collapse* work together and provides tips on how to write very efficient *collapse* code. Future blog posts will expose some specialized functionality in more detail. ## Cheatsheet Finally, there is a [cheatsheet]() at Rstudio that compactly summarizes the collapse function space, similar to `help("collapse-documentation")`. This one will be updated shortly. collapse/inst/doc/collapse_documentation.html0000644000176200001440000003017214201327662021225 0ustar liggesusers collapse Documentation and Resources

collapse Documentation and Resources

Sebastian Krantz

2022-02-11

collapse is a C/C++ based package for data transformation and statistical computing in R. It’s aims are:

  1. To facilitate complex data transformation, exploration and computing tasks in R.
  2. To help make R code fast, flexible, parsimonious and programmer friendly.

Documentation comes in 4 different forms:

Vignettes

There are also 5 vignettes which are available online (due to their size and the enhanced browsing experience on the website). The vignettes are:

  • Introduction to collapse : Introduces all main features in a structured way

  • collapse and dplyr : Demonstrates the integration of collapse with dplyr / tidyverse workflows and associated performance improvements

  • collapse and plm: Demonstrates the integration of collapse with plm and shows examples of efficient programming with panel data

  • collapse and data.table: Shows how collapse and data.table may be used together in a harmonious way

  • collapse and sf: Shows how collapse can be used to efficiently manipulate sf data frames

Note that these vignettes currently (January 2022) do not cover features introduced in version 1.7. They have been updated if you see a 2022 in the date of the vignette.

Blog

I maintain a blog linked to Rbloggers.com where I introduced collapse with some compact posts covering central functionality. Among these, the post about programming with collapse is highly recommended for ambitious users and developers willing to build on collapse, as it exposes to some degree how central parts of collapse work together and provides tips on how to write very efficient collapse code. Future blog posts will expose some specialized functionality in more detail.

Cheatsheet

Finally, there is a cheatsheet at Rstudio that compactly summarizes the collapse function space, similar to help("collapse-documentation"). This one will be updated shortly.

Built-In Structured Documentation

After installing collapse, you can call help("collapse-documentation") which will produce a central help page providing a broad overview of the entire functionality of the package, including direct links to all function documentation pages and links to 13 further topical documentation pages describing how clusters of related functions work together. The names of these additional help pages are contained in a global macro .COLLAPSE_TOPICS and can so easily be called from the R console as well. Function documentation is interlinked with the relevant topical pages, and all documentation pages link back to the central overview page at help("collapse-documentation").

Thus collapse comes with a fully structured hierarchical documentation which you can browse within R - and that provides everything necessary to fully understand the package. The Documentation is also available online.

The package page under help("collapse-package") additionally provides some more general information about the package and its design philosophy, as well as a very compact set of examples covering important functionality (which lack features introduced in 1.7 though).

Reading help("collapse-package") and help("collapse-documentation") and working through the examples on help("collapse-package") is the fastest way to get acquainted with the package. help("collapse-documentation") is also the most up-to-date documentation of the package at the time of releasing 1.7 (January 2022).