cpp11/0000755000175000017500000000000014151365154011335 5ustar nileshnileshcpp11/MD50000644000175000017500000001127414151365154011652 0ustar nileshnilesh48ee8ed6a5ae5ae43ad30a278a8bbf0b *DESCRIPTION 5174dfc514f0941d2edd4b0b4c9941dd *LICENSE 3a9298876794331325c170d77feb1164 *NAMESPACE efef50e9187c790d233dd481a542259c *NEWS.md b4fe12876c37f8a22cde06e9701eecf0 *R/cpp11-package.R 226e91d15c31e8053239e05b88813f5c *R/knitr.R 41afd7ca59ec2de3dcd88b085efab99b *R/register.R 70bd4962478b225f5595d3afda019fcc *R/source.R 868737942d1e7242211ce46fc8a431ca *R/utils.R d1940a957980425e9ca4abe7332bd9d2 *R/vendor.R 95e2bfaf26ccf57087023a9537a94cd9 *R/zzz.R 06f3572dac031af8ae4d4ffe58ca5ee1 *README.md 0430c893fae0637d2c1d8c75d1244f91 *build/vignette.rds 5a16153258a8f58d60845244595c70f2 *inst/doc/FAQ.R d6ad43e69c99c9908046e8940e512d74 *inst/doc/FAQ.Rmd 6916205d6d34fa1b41d5b5d85a513852 *inst/doc/FAQ.html a6ff177da15857d4cc52737a66dcbf8a *inst/doc/converting.R caa747c1da44815ec50f3dc55187dd90 *inst/doc/converting.Rmd bc8bf131d4bd1bc70c20a7c9aeeac60e *inst/doc/converting.html 3e58c0b85862756107c471e5a1378a04 *inst/doc/cpp11.R a6b8148087dfa1d47c06496206322ffd *inst/doc/cpp11.Rmd 11f773e64cf1505c25d7ed51ef226e11 *inst/doc/cpp11.html 3c9e2d06e8560272e30c98b0c827478b *inst/doc/internals.R 394717ccab71d2c8cd85abd348ba567b *inst/doc/internals.Rmd cdba1b409a7b0a8b2bb17a0691386c68 *inst/doc/internals.html 1dc81cd1cd7899d47e84ae91912cbdd1 *inst/doc/motivations.R f915f9289185d03439ba603994865581 *inst/doc/motivations.Rmd e3143589338f66b31374006ae70046dd *inst/doc/motivations.html 5285ae2dfd6c4f8cc939a40418ddb470 *inst/include/cpp11.hpp 4b5cac8f9dd39ba15f22b4ada435708e *inst/include/cpp11/R.hpp ba408ddc45d62c21c7d763fdc8373335 *inst/include/cpp11/altrep.hpp 5cb7b640644606e9ca7ddfb50383321f *inst/include/cpp11/as.hpp b70ab7b50907184aae72442e2466daba *inst/include/cpp11/attribute_proxy.hpp 52decc239a860f96110d057705625fea *inst/include/cpp11/data_frame.hpp d323258be1bd05a50a4646382d2fca63 *inst/include/cpp11/declarations.hpp 101d14f02aaff30f11bed5e340185fca *inst/include/cpp11/doubles.hpp ec9d64eb037cf2a26c48949dd1c8f381 *inst/include/cpp11/environment.hpp 4d0b3e7f6e7f003f3dd38e2eea2b1dfa *inst/include/cpp11/external_pointer.hpp 7dff1c4addaa5b35ba6f3ccf18591fa1 *inst/include/cpp11/function.hpp 3999c3b42abeb351b55b4fe453da0552 *inst/include/cpp11/integers.hpp 978b332236a4afd4fddd0e0cf66f8491 *inst/include/cpp11/list.hpp 0445682616be11f7cebb175862bad05a *inst/include/cpp11/list_of.hpp 4135d9c54f7cf5fe8af2506292454ea5 *inst/include/cpp11/logicals.hpp 21297e472e86ba458741f981a9794037 *inst/include/cpp11/matrix.hpp 4194cd645dca505de4ede46e90e2a457 *inst/include/cpp11/named_arg.hpp 4df0d221b8c89a7dda78c0f00af4e7f9 *inst/include/cpp11/protect.hpp 6be54b67912176da13ce70933c171a14 *inst/include/cpp11/r_bool.hpp 8115e7a88015ce9d6426ecde997b8a28 *inst/include/cpp11/r_string.hpp c962f8e5ab087f00304656d78a447d8e *inst/include/cpp11/r_vector.hpp aea633bcbe570c00e68486d35c7ad187 *inst/include/cpp11/raws.hpp 11388d8b414f525baa56e88cc899a199 *inst/include/cpp11/sexp.hpp 7e9b4f2601c8b0e7b484af5d3499add4 *inst/include/cpp11/strings.hpp 8eff5dd4e380f63ebdb50936b407d741 *inst/include/fmt/core.h fc80debc11fb314f5f83f1bfa793a729 *inst/include/fmt/format-inl.h 4bbcd3c6e4a7b8bcd75821d4549cc372 *inst/include/fmt/format.h 8b7ce8c67f1a8a9ba4ca010396888273 *man/cpp11-package.Rd b2eb9fc16adf53535809395b9fbdc7e9 *man/cpp_register.Rd e3109b9a67cd6e05dc9743ceff1ba747 *man/cpp_source.Rd 9447728431148999097e9893e868292b *man/cpp_vendor.Rd 10a2a51a7fbe805d1eabedffedc00923 *tests/testthat.R 92a6a43ee848377faa1d4a4f437ab0c1 *tests/testthat/helper.R e005a75c69d17cf1df1817dd45be64ca *tests/testthat/linking_to_incorrect_registers.cpp e1fd2e7c49d550b7b768515857f72666 *tests/testthat/linking_to_registers.cpp d64ad835cd166014d7c3f70be7de3f3e *tests/testthat/multiple.cpp 30750234f0006b05dc8d530d096f4bd8 *tests/testthat/multiple_incorrect.cpp 4de69715de956a72794f8f2a05225797 *tests/testthat/single.cpp 682f42a78cb5e1019af7261bb7d0dc25 *tests/testthat/single_error.cpp 78fe5d68f1773f0763c6718e6268961d *tests/testthat/single_incorrect.cpp e93f8023c507727c2f64c957f5dfe58b *tests/testthat/test-knitr.R 3f81230744ed7ae81c5b373eaaba4364 *tests/testthat/test-register.R d03a65b0efa0f206ffcb80b01e1e0d81 *tests/testthat/test-source.R abde45c5cbd2f0b5f07c25e7fc9d8907 *tests/testthat/test-utils.R abb3cf96a4815c4e2ce056d4bf70d76c *tests/testthat/test-vendor.R d6ad43e69c99c9908046e8940e512d74 *vignettes/FAQ.Rmd caa747c1da44815ec50f3dc55187dd90 *vignettes/converting.Rmd a6b8148087dfa1d47c06496206322ffd *vignettes/cpp11.Rmd aa95c3b3f0106872483fe170899c5304 *vignettes/cpp11_faq.html a2568cbb782b68b805c1b757138d9eee *vignettes/growth.Rds 394717ccab71d2c8cd85abd348ba567b *vignettes/internals.Rmd f915f9289185d03439ba603994865581 *vignettes/motivations.Rmd ac8923545624f46bb9a3e64a63c3250d *vignettes/release.Rds e35d25912d1c1d5b1b093a5ca768b45b *vignettes/sum.Rds cpp11/NEWS.md0000644000175000017500000001513714151206323012432 0ustar nileshnilesh# cpp11 0.4.2 * Romain François is now the maintainer. # cpp11 0.4.1 * Fix crash related to unwind protect optimization (#244) # cpp11 0.4.0 ## New Features * New opt-in message formatting with the {fmt} C++ library for `cpp11::messages()` `cpp11::stop()` and `cpp11::warning()`. Set the `CPP11_USE_FMT` macro to use this feature in your package. (@sbearrows, #169, #208) * New `as_double()` and `as_integer()` methods to coerce integers to doubles and doubles to integers to doubles (@sbearrows, #46) * `cpp11::matrix` iterators can now be used either row-wise or column-wise (the default) depending on the user's choice (@alyst, #229) ## Improvements and fixes * Read-only matrix accessors are now marked const (#234) * `writable::r_vector` default constructors now return a 0 length vector when converted to `SEXP` (#166) * Read-only `r_vector` constructors now disallow implicit construction with named arguments (#237) * Read-only `r_vector.attr()` methods now return const objects, so it is a compile time error to try to assign to them (#237) * Fixed `+` and `+=` operators of `r_vector::[const_]iterator` to conform the *iterators* concept: `+=` updates the iterator, and `+` returns the updated copy, while keeping the original unchanged (@alyst, #231) * Remove undefined behavior when constructing global `cpp11::sexp`s (#224) * Removed redundant `.Call calls` in cpp11.cpp file (@sbearrows, #170) * Error messages now output original file name rather than the temporary file name (@sbearrows, #194) * `cpp_register()` now includes `attribute_visible` in the init function, so packages compiled with `C_VISIBILITY` will find the init function. * Fixed bug when running `cpp_source()` on the same file more than once (@sbearrows, #202) * Allow cpp11 decorators of the form `cpp11::linking_to` (@sbearrows, #193) * Removed internal instances of `cpp11::stop()` and replaced with C++ exceptions (@sbearrows, #203) * Names of named lists are now resized along with the list elements (@sbearrows, #206) # cpp11 0.3.1 * Fix stringop-truncation warning from generated wrapping code. # cpp11 0.3.0 ## New functions and features * New `x.empty()` method to check if a vector is empty (@sbearrows, #182) * New `x.named()` method to check if a vector is named (@sbearrows, #186) * New `na()` free function to return the NA sentinels for R objects (@sbearrows, #179) ## Major fixes * Memory no longer inadvertently leaks when move constructing vectors (#173) ## Minor improvements and fixes * Incorrectly formatted cpp11 decorators now output a more informative error message (@sbearrows, #127) * Generated registration code now uses C collation to avoid spurious changes from `tools::package_native_routine_registration_skeleton()` (@sbearrows, #171) * Makevars files which include filenames now handle spaces in paths properly (@klmr, #160) # cpp11 0.2.7 * Fix a transient memory leak for functions that return values from `cpp11::unwind_protect()` and `cpp11::safe` (#154) * `cpp_source()` now gets an argument `dir` to allow customized temporary directory to store generated source files. It makes it easier to debug C++ source files in non-package project via source mapping. (@renkun-ken, #156) # cpp11 0.2.6 * `cpp_register()` now uses symbols exclusively in the `.Call()` interface. This allows it to be more robust in interactive use with the pkgload package. # cpp11 0.2.5 * `cpp_source()` gains a `cxx_std` argument to control which C++ standard is used. This allows you to use code from `C++14` and later standards with cpp_source(). (#100) * The cpp11 knitr engine now allows you to set the `cxx_std` chunk option to control the C++ standard used. * `cpp_source()` now has much more informative error messages when compilation fails (#125, #139) * `cpp_source()` now uses a unique name for the DLL, so works when run multiple times on the same source file on Windows (#143) * `writable::list_of` now supports modification of vectors as intended (#131). * Errors when running `tools::package_native_routine_registration_skeleton()` are no longer swallowed (#134) * `cpp_source()` can now accept a source file called `cpp11.cpp` (#133) * `named_arg` now explicitly protect their values, avoiding protection issues when using large inputs. [tidyverse/readr#1145](https://github.com/tidyverse/readr/issues/1145) * `r_string(std::string)` now uses `Rf_mkCharLenCE()` instead of `Rf_mkChar()`, which avoids the performance cost of checking the string length. * Writable vector classes now properly set their lengths as intended when being copied to a read only class (#128). # cpp11 0.2.4 * The preserve list is now more robust to invalid values, such as when the XPtr has no address or if non-xptr's are stored in the option. This fixes errors when reloading packages using cpp11 and RStudio's session restores. * The preserve list is now more robust to invalid values, such as null pointers when the XPtr is serialized. This situation occurs during 'Install and Restart' in RStudio (#121) # cpp11 0.2.3 * `r_vector::const_iterator::operator*` is now a const method (#113, @bkietz, @xhochy) * The preserve list is now stored in an XPtr, rather than an environment, to avoid issues when serializing the preserve environment, which happens implicitly when RStudio or RStudio Cloud saves all options when resuming a session (#116) # cpp11 0.2.2 * `r_bool` added as an adapter between `bool` and `Rboolean` values (#57, @bkietz) * `data_frame()` objects now have the number of rows correctly set as real length, not the reserved length (#91) * Fixed potential memory leak in cpp11::writable classes. # cpp11 0.2.1 * Ensures backwards compatibility with code generation from cpp11 0.1.0 (#88) * `push_back()` now works more consistently with named arguments (#86) # cpp11 0.2.0 ## New features * cpp11 is now able to compile on gcc 4.8.5 (#69, @bkietz) * `cpp_source()`, `cpp_function()` and `cpp_eval()` now support `[[cpp11::linking_to()]]` syntax to link to third party packages with C++ headers. (#48) ## Minor improvements and fixes * `as_cpp()` now works with enumeration types (#52, @bkietz) * `as_cpp()` and `as_cpp()` now implicitly coerce between all 3 types of single NA values (#53). * `list::const_iterator::operator*()` added so iterators could be used on list objects (#60, @romainfrancois) * `safe[]` can now work with functions that return any type (#70, @bkietz) * The `END_CPP` macro now includes a `catch(...)` block to catch all C++ exceptions that do not inherit from `std::exception` (#47). * Improve consistency of inserting NA values in r_string objects (#45) * Added a `NEWS.md` file to track changes to the package. # cpp11 0.1.0 * Initial release cpp11/DESCRIPTION0000644000175000017500000000321314151365154013042 0ustar nileshnileshPackage: cpp11 Title: A C++11 Interface for R's C Interface Version: 0.4.2 Authors@R: c(person(given = "Jim", family = "Hester", role = "aut", comment = c(ORCID = "0000-0002-2739-7082")), person(given = "Romain", family = "François", role = c("aut", "cre"), email = "romain@rstudio.com"), person(given = "Benjamin", family = "Kietzman", role = "ctb"), person(given = "RStudio", role = c("cph", "fnd"))) Description: Provides a header only, C++11 interface to R's C interface. Compared to other approaches 'cpp11' strives to be safe against long jumps from the C API as well as C++ exceptions, conform to normal R function semantics and supports interaction with 'ALTREP' vectors. License: MIT + file LICENSE URL: https://cpp11.r-lib.org, https://github.com/r-lib/cpp11 BugReports: https://github.com/r-lib/cpp11/issues Suggests: bench, brio, callr, cli, covr, decor, desc, ggplot2, glue, knitr, lobstr, mockery, progress, rmarkdown, scales, Rcpp, testthat, tibble, utils, vctrs, withr VignetteBuilder: knitr Config/testthat/edition: 3 Config/Needs/cpp11/cpp_register: brio, cli, decor, desc, glue, tibble, vctrs Encoding: UTF-8 RoxygenNote: 7.1.2 SystemRequirements: C++11 NeedsCompilation: no Packaged: 2021-11-29 17:26:06 UTC; jhester Author: Jim Hester [aut] (), Romain François [aut, cre], Benjamin Kietzman [ctb], RStudio [cph, fnd] Maintainer: Romain François Repository: CRAN Date/Publication: 2021-11-30 09:10:04 UTC cpp11/README.md0000644000175000017500000001205414140523457012616 0ustar nileshnilesh# cpp11 [![R-CMD-check](https://github.com/r-lib/cpp11/workflows/R-CMD-check/badge.svg)](https://github.com/r-lib/cpp11/actions) [![codecov](https://app.codecov.io/gh/r-lib/cpp11/branch/main/graph/badge.svg?token=EEWYoCYxQ2)](https://app.codecov.io/gh/r-lib/cpp11) [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) [![CRAN status](https://www.r-pkg.org/badges/version/cpp11)](https://CRAN.R-project.org/package=cpp11) cpp11 helps you to interact with R objects using C++ code. Its goals and syntax are similar to the excellent [Rcpp](https://cran.r-project.org/package=Rcpp) package. ## Using cpp11 in a package To add cpp11 to an existing package, put your C++ files in the `src/` directory and add the following to your DESCRIPTION file: ``` LinkingTo: cpp11 SystemRequirements: C++11 ``` Then decorate C++ functions you want to expose to R with `[[cpp11::register]]`. *Note that this is a [C++11 attribute](https://en.cppreference.com/w/cpp/language/attributes), not a comment like is used in Rcpp.* cpp11 is a header only library with no hard dependencies and does not use a shared library, so it is straightforward and reliable to use in packages without fear of compile-time and run-time mismatches. Alternatively, you can [vendor](https://cpp11.r-lib.org/articles/motivations.html#vendoring) the current installed version of cpp11 headers into your package with `cpp11::vendor_cpp11()`. This ensures the headers will remain unchanged until you explicitly update them. ## Getting started See [vignette("cpp11")](https://cpp11.r-lib.org/articles/cpp11.html) to get started using cpp11 in your scripts, particularly if you are new to C++ programming. ## Getting help [![RStudio community](https://img.shields.io/badge/community-cpp11-blue?style=social&logo=rstudio&logoColor=75AADB)](https://community.rstudio.com/new-topic?category=Package%20development&tags=cpp11) [RStudio community](https://community.rstudio.com/new-topic?category=Package%20development&tags=cpp11) with the `cpp11` tag is the best place to ask for help using cpp11 or interfacing C++ with R. ## Motivations [Rcpp](https://cran.r-project.org/package=Rcpp) has been a widely successful project, however over the years a number of issues and additional C++ features have arisen. Adding these features to Rcpp would require a great deal of work, or in some cases would be impossible without severely breaking backwards compatibility. **cpp11** is a ground up rewrite of C++ bindings to R with different design trade-offs and features. Changes that motivated cpp11 include: - Enforcing [copy-on-write semantics](https://cpp11.r-lib.org/articles/motivations.html#copy-on-write-semantics). - Improving the [safety](https://cpp11.r-lib.org/articles/motivations.html#improve-safety) of using the R API from C++ code. - Supporting [ALTREP objects](https://cpp11.r-lib.org/articles/motivations.html#altrep-support). - Using [UTF-8 strings](https://cpp11.r-lib.org/articles/motivations.html#utf-8-everywhere) everywhere. - Applying newer [C++11 features](https://cpp11.r-lib.org/articles/motivations.html#c11-features). - Having a more straightforward, [simpler implementation](https://cpp11.r-lib.org/articles/motivations.html#simpler-implementation). - Faster [compilation time](https://cpp11.r-lib.org/articles/motivations.html#compilation-speed) with lower memory requirements. - Being *completely* [header only](https://cpp11.r-lib.org/articles/motivations.html#header-only) to avoid ABI issues. - Capable of [vendoring](https://cpp11.r-lib.org/articles/motivations.html#vendoring) if desired. - More robust [protection](https://cpp11.r-lib.org/articles/motivations.html#protection) using a much more efficient linked list data structure. - [Growing vectors](https://cpp11.r-lib.org/articles/motivations.html#growing-vectors) more efficiently. See [vignette("motivations")](https://cpp11.r-lib.org/articles/motivations.html) for full details on the motivations for writing cpp11. ## Conversion from Rcpp See [vignette("converting")](https://cpp11.r-lib.org/articles/converting.html) if you are already familiar with Rcpp or have an existing package that uses Rcpp and want to convert it to use cpp11. ## Learning More - [Welding R and C++](https://www.youtube.com/watch?v=_kq0N0FNIjA) - Presentation at SatRday Columbus [(slides)](https://speakerdeck.com/jimhester/cpp11-welding-r-and-c-plus-plus) ## Internals See [vignette("internals")](https://cpp11.r-lib.org/articles/internals.html) for details on the cpp11 implementation or if you would like to contribute to cpp11. ## Code of Conduct Please note that the cpp11 project is released with a [Contributor Code of Conduct](https://cpp11.r-lib.org/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. ## Thanks cpp11 would not exist without Rcpp. Thanks to the Rcpp authors, Dirk Eddelbuettel, Romain Francois, JJ Allaire, Kevin Ushey, Qiang Kou, Nathan Russell, Douglas Bates and John Chambers for their work writing and maintaining Rcpp. cpp11/man/0000755000175000017500000000000014151206456012107 5ustar nileshnileshcpp11/man/cpp_vendor.Rd0000644000175000017500000000240614071114152014527 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vendor.R \name{cpp_vendor} \alias{cpp_vendor} \title{Vendor the cpp11 dependency} \usage{ cpp_vendor(path = ".") } \arguments{ \item{path}{The path to the package root directory} } \value{ The file path to the vendored code (invisibly). } \description{ Vendoring is the act of making your own copy of the 3rd party packages your project is using. It is often used in the go language community. } \details{ This function vendors cpp11 into your package by copying the cpp11 headers into the \code{inst/include} folder of your package and adding 'cpp11 version: XYZ' to the top of the files, where XYZ is the version of cpp11 currently installed on your machine. If you choose to vendor the headers you should \emph{remove} \code{LinkingTo: cpp11} from your DESCRIPTION. \strong{Note}: vendoring places the responsibility of updating the code on \strong{you}. Bugfixes and new features in cpp11 will not be available for your code until you run \code{vector_cpp11()} again. } \examples{ # create a new directory dir <- tempfile() dir.create(dir) # vendor the cpp11 headers into the directory cpp_vendor(dir) list.files(file.path(dir, "inst", "include", "cpp11")) # cleanup unlink(dir, recursive = TRUE) } cpp11/man/cpp11-package.Rd0000644000175000017500000000174614151200307014710 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cpp11-package.R \docType{package} \name{cpp11-package} \alias{cpp11} \alias{cpp11-package} \title{cpp11: A C++11 Interface for R's C Interface} \description{ Provides a header only, C++11 interface to R's C interface. Compared to other approaches 'cpp11' strives to be safe against long jumps from the C API as well as C++ exceptions, conform to normal R function semantics and supports interaction with 'ALTREP' vectors. } \seealso{ Useful links: \itemize{ \item \url{https://cpp11.r-lib.org} \item \url{https://github.com/r-lib/cpp11} \item Report bugs at \url{https://github.com/r-lib/cpp11/issues} } } \author{ \strong{Maintainer}: Romain François \email{romain@rstudio.com} Authors: \itemize{ \item Jim Hester (\href{https://orcid.org/0000-0002-2739-7082}{ORCID}) } Other contributors: \itemize{ \item Benjamin Kietzman [contributor] \item RStudio [copyright holder, funder] } } \keyword{internal} cpp11/man/cpp_register.Rd0000644000175000017500000000305714120423710015057 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/register.R \name{cpp_register} \alias{cpp_register} \title{Generates wrappers for registered C++ functions} \usage{ cpp_register(path = ".", quiet = FALSE) } \arguments{ \item{path}{The path to the package root directory} \item{quiet}{If \code{TRUE} suppresses output from this function} } \value{ The paths to the generated R and C++ source files (in that order). } \description{ Functions decorated with \verb{[[cpp11::register]]} in files ending in \code{.cc}, \code{.cpp}, \code{.h} or \code{.hpp} will be wrapped in generated code and registered to be called from R. } \details{ Note registered functions will not be \emph{exported} from your package unless you also add a \verb{@export} roxygen2 directive for them. In order to use \code{cpp_register()} the \code{cli}, \code{decor}, \code{desc}, \code{glue}, \code{tibble} and \code{vctrs} packages must also be installed. } \examples{ # create a minimal package dir <- tempfile() dir.create(dir) writeLines("Package: testPkg", file.path(dir, "DESCRIPTION")) writeLines("useDynLib(testPkg, .registration = TRUE)", file.path(dir, "NAMESPACE")) # create a C++ file with a decorated function dir.create(file.path(dir, "src")) writeLines("[[cpp11::register]] int one() { return 1; }", file.path(dir, "src", "one.cpp")) # register the functions in the package cpp_register(dir) # Files generated by registration file.exists(file.path(dir, "R", "cpp11.R")) file.exists(file.path(dir, "src", "cpp11.cpp")) # cleanup unlink(dir, recursive = TRUE) } cpp11/man/cpp_source.Rd0000644000175000017500000000513214134540114014532 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/source.R \name{cpp_source} \alias{cpp_source} \alias{cpp_function} \alias{cpp_eval} \title{Compile C++ code} \usage{ cpp_source( file, code = NULL, env = parent.frame(), clean = TRUE, quiet = TRUE, cxx_std = Sys.getenv("CXX_STD", "CXX11"), dir = tempfile() ) cpp_function( code, env = parent.frame(), clean = TRUE, quiet = TRUE, cxx_std = Sys.getenv("CXX_STD", "CXX11") ) cpp_eval( code, env = parent.frame(), clean = TRUE, quiet = TRUE, cxx_std = Sys.getenv("CXX_STD", "CXX11") ) } \arguments{ \item{file}{A file containing C++ code to compile} \item{code}{If non-null, the C++ code to compile} \item{env}{The R environment where the R wrapping functions should be defined.} \item{clean}{If \code{TRUE}, cleanup the files after sourcing} \item{quiet}{If 'TRUE`, do not show compiler output} \item{cxx_std}{The C++ standard to use, the \code{CXX_STD} make macro is set to this value. The default value queries the \code{CXX_STD} environment variable, or uses 'CXX11' if unset.} \item{dir}{The directory to store the generated source files. \code{tempfile()} is used by default. The directory will be removed if \code{clean} is \code{TRUE}.} } \value{ For \code{\link[=cpp_source]{cpp_source()}} and \verb{[cpp_function()]} the results of \code{\link[=dyn.load]{dyn.load()}} (invisibly). For \verb{[cpp_eval()]} the results of the evaluated expression. } \description{ \code{\link[=cpp_source]{cpp_source()}} compiles and loads a single C++ file for use in R. \code{\link[=cpp_function]{cpp_function()}} compiles and loads a single function for use in R. \code{\link[=cpp_eval]{cpp_eval()}} evaluates a single C++ expression and returns the result. } \details{ Within C++ code you can use \verb{[[cpp11::linking_to("pkgxyz")]]} to link to external packages. This is equivalent to putting those packages in the \code{LinkingTo} field in a package DESCRIPTION. } \examples{ cpp_source( code = '#include "cpp11/integers.hpp" [[cpp11::register]] int num_odd(cpp11::integers x) { int total = 0; for (int val : x) { if ((val \% 2) == 1) { ++total; } } return total; } ') num_odd(as.integer(c(1:10, 15, 23))) if (interactive() && require("progress")) { cpp_source( code = ' #include #include [[cpp11::linking_to("progress")]] [[cpp11::register]] void show_progress() { RProgress::RProgress pb("Processing [:bar] ETA: :eta"); pb.tick(0); for (int i = 0; i < 100; i++) { usleep(2.0 / 100 * 1000000); pb.tick(); } } ') show_progress() } } cpp11/vignettes/0000755000175000017500000000000014151206456013344 5ustar nileshnileshcpp11/vignettes/growth.Rds0000644000175000017500000000071214071114152015320 0ustar nileshnileshb```b`@YH1`(  # L;4d X@ N ͚\P`hH{KG26v9 v7@KA}n ԧ- ]~7SY~J?S']y;rl@2 FrJJKΈ/Me&`.f@p{̐s` AjA=+@d@.fHn#*KRB VRGмSMʹ!Th^a.;t"P~8KM T9'5,N1s3a99pc3K`i-/>=΢r=] 25F rtX a+IʉOIȃ)MI,IK+ fcpp11/vignettes/converting.Rmd0000644000175000017500000002324214120423440016157 0ustar nileshnilesh--- title: "Converting from Rcpp" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Converting from Rcpp} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) should_run_benchmarks <- function(x) { get("requireNamespace")("cpp11test", quietly = TRUE) && asNamespace("cpp11test")$should_run_benchmarks() } ``` In many cases there is no need to convert a package from Rcpp. If the code is already written and you don't have a very compelling need to use cpp11 I would recommend you continue to use Rcpp. However if you _do_ feel like your project will benefit from using cpp11 this vignette will provide some guidance and doing the conversion. It is also a place to highlight some of the largest differences between Rcpp and cpp11. ## Class comparison table | Rcpp | cpp11 (read-only) | cpp11 (writable) | cpp11 header | | --- | --- | --- | --- | | NumericVector | doubles | writable::doubles | | | IntegerVector | integers | writable::integers | | | CharacterVector | strings | writable::strings | | | RawVector | raws | writable::raws | | | List | list | writable::list | | | RObject | sexp | | | | XPtr | | external_pointer | | | Environment | | environment | | | Function | | function | | | Environment (namespace) | | package | | | wrap | | as_sexp | | | as | | as_cpp | | | stop | stop | | | | checkUserInterrupt | check_user_interrupt | | | ## Incomplete list of Rcpp features not included in cpp11 - None of [Modules](https://CRAN.R-project.org/package=Rcpp/vignettes/Rcpp-modules.pdf) - None of [Sugar](https://CRAN.R-project.org/package=Rcpp/vignettes/Rcpp-sugar.pdf) - Some parts of [Attributes](https://CRAN.R-project.org/package=Rcpp/vignettes/Rcpp-attributes.pdf) - No dependencies - No random number generator restoration - No support for roxygen2 comments - No interfaces ## Read-only vs writable vectors The largest difference between cpp11 and Rcpp classes is that Rcpp classes modify their data in place, whereas cpp11 classes require copying the data to a writable class for modification. The default classes, e.g. `cpp11::doubles` are *read-only* classes that do not permit modification. If you want to modify the data you need to use the classes in the `cpp11::writable` namespace, e.g. `cpp11::writable::doubles`. In addition use the `writable` variants if you need to create a new R vector entirely in C++. ## Fewer implicit conversions Rcpp also allows very flexible implicit conversions, e.g. if you pass a `REALSXP` to a function that takes a `Rcpp::IntegerVector()` it is implicitly converted to a `INTSXP`. These conversions are nice for usability, but require (implicit) duplication of the data, with the associated runtime costs. cpp11 throws an error in these cases. If you want the implicit coercions you can add a call to `as.integer()` or `as.double()` as appropriate from R when you call the function. ## Calling R functions from C++ Calling R functions from C++ is similar to using Rcpp. ```c++ Rcpp::Function as_tibble("as_tibble", Rcpp::Environment::namespace_env("tibble")); as_tibble(x, Rcpp::Named(".rows", num_rows), Rcpp::Named(".name_repair", name_repair)); ``` ```c++ using namespace cpp11::literals; // so we can use ""_nm syntax auto as_tibble = cpp11::package("tibble")["as_tibble"]; as_tibble(x, ".rows"_nm = num_rows, ".name_repair"_nm = name_repair); ``` ## Appending behavior One major difference in Rcpp and cpp11 is how vectors are grown. Rcpp vectors have a `push_back()` method, but unlike `std::vector()` no additional space is reserved when pushing. This makes calling `push_back()` repeatably very expensive, as the entire vector has to be copied each call. In contrast `cpp11` vectors grow efficiently, reserving extra space. Because of this you can do ~10,000,000 vector appends with cpp11 in approximately the same amount of time that Rcpp does 10,000, as this benchmark demonstrates. ```{r, message = FALSE, eval = should_run_benchmarks()} library(cpp11test) grid <- expand.grid(len = 10 ^ (0:7), pkg = "cpp11", stringsAsFactors = FALSE) grid <- rbind( grid, expand.grid(len = 10 ^ (0:4), pkg = "rcpp", stringsAsFactors = FALSE) ) b_grow <- bench::press(.grid = grid, { fun = match.fun(sprintf("%sgrow_", ifelse(pkg == "cpp11", "", paste0(pkg, "_")))) bench::mark( fun(len) ) } )[c("len", "pkg", "min", "mem_alloc", "n_itr", "n_gc")] saveRDS(b_grow, "growth.Rds", version = 2) ``` ```{r, echo = FALSE, dev = "svg", fig.ext = "svg", eval = capabilities("cairo")} b_grow <- readRDS("growth.Rds") library(ggplot2) ggplot(b_grow, aes(x = len, y = min, color = pkg)) + geom_point() + geom_line() + bench::scale_y_bench_time() + scale_x_log10( breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x)) ) + coord_fixed() + theme(panel.grid.minor = element_blank()) + labs(title = "log-log plot of vector size vs construction time", x = NULL, y = NULL) ``` ```{r, echo = FALSE} knitr::kable(b_grow) ``` ## Random Number behavior Rcpp unconditionally includes calls to `GetRNGstate()` and `PutRNGstate()` before each wrapped function. This ensures that if any C++ code calls the R API functions `unif_rand()`, `norm_rand()`, `exp_rand()` or `R_unif_index()` the random seed state is set accordingly. cpp11 does _not_ do this, so you must include the calls to `GetRNGstate()` and `PutRNGstate()` _yourself_ if you use any of those functions in your C++ code. See [R-exts 6.3 - Random number generation](https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Random-numbers) for details on these functions. One convenient way to do safely is to use a simple class: ```cpp class local_rng { public: local_rng() { GetRNGstate(); } ~local_rng(){ PutRNGstate(); } }; void foo() { local_rng rng_state; /* my code using the RNG */ } ``` ## Mechanics of converting a package from Rcpp 1. Add cpp11 to `LinkingTo` 1. Add C++11 to `SystemRequirements` 1. Convert all instances of `// [[Rcpp::export]]` to `[[cpp11::register]]` 1. Clean and recompile the package, e.g. `pkgbuild::clean_dll()` `pkgload::load_all()` 1. Run tests `devtools::test()` 1. Start converting function by function - Remember you can usually inter-convert between cpp11 and Rcpp classes by going through `SEXP` if needed. - Converting the code a bit at a time (and regularly running your tests) is the best way to do the conversion correctly and make progress - Doing a separate commit after converting each file (or possibly each function) can make finding any regressions with [git bisect](https://youtu.be/KKeucpfAuuA) much easier in the future. ## Common issues when converting ### STL includes Rcpp.h includes a number of STL headers automatically, notably `` and ``, however the cpp11 headers generally do not. If you have errors like > error: no type named 'string' in namespace 'std' You will need to include the appropriate STL header, in this case ``. ### R API includes cpp11 conflicts with macros declared by some R headers unless the macros `R_NO_REMAP` and `STRICT_R_HEADERS` are defined. If you include `cpp11/R.hpp` before any R headers these macros will be defined appropriately, otherwise you may see errors like > R headers were included before cpp11 headers and at least one of R_NO_REMAP or STRICT_R_HEADERS was not defined. Which indicate that you must either change your include order or add preprocessor definitions for `R_NO_REMAP` and `STRICT_R_HEADERS`. Note that transitive includes of R headers (for example, those included by `Rcpp.h`) can also introduce the conflicting macros. ### Type aliases If you use typedefs for cpp11 types or define custom types you will need to define them in a `pkgname_types.hpp` file so that `cpp_register()` can include it in the generated code. ### `cpp11::stop()` and `cpp11::warning()` with `std::string` `cpp11::stop()` and `cpp11::warning()` are thin wrappers around `Rf_stop()` and `Rf_warning()`. These are simple C functions with a `printf()` API, so do not understand C++ objects like `std::string`. Therefore you need to call `obj.c_str()` when passing character data to them. ### Logical vector construction If you are constructing a length 1 logical vector you may need to explicitly use a `r_bool()` object in the initializer list rather than `TRUE`, `FALSE` or `NA_INTEGER`. This issue only occurs with the clang compiler, not gcc. When constructing vectors with more than one element this is not an issue ```cpp // bad cpp11::writable::logicals({FALSE}); // good cpp11::writable::logicals({r_bool(FALSE)}); // good cpp11::writable::logicals({FALSE, NA_LOGICAL}); ``` cpp11/vignettes/internals.Rmd0000644000175000017500000002061214140020620015771 0ustar nileshnilesh--- title: "cpp11 internals" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{cpp11 internals} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` The development repository for cpp11 is . ## Initial setup and dev workflow First install any dependencies needed for development. ```r install.packages("remotes") remotes::install_deps(dependencies = TRUE) ``` You can load the package in an interactive R session ```r devtools::load_all() ``` Or run the tests with ```r devtools::test() ``` `test()` will also re-compile the package if needed, so you do not always have to run `load_all()`. If you change the cpp11 headers you will need to clean and recompile the cpp11test package ```r devtools::clean_dll() devtools::load_all() ``` Generally when developing the C++ headers I run R with its working directory in the `cpp11test` directory and use `devtools::test()` to run the cpp11tests. To calculate code coverage of the cpp11 package run the following from the `cpp11` root directory. ```r covr::report(cpp11_coverage()) ``` ## Code formatting This project uses [clang-format](https://clang.llvm.org/docs/ClangFormat.html) (version 10) to automatically format the c++ code. You can run `make format` to re-format all code in the project. If your system does not have `clang-format` version 10, this can be installed using a [homebrew tap](https://github.com/r-lib/homebrew-taps) at the command line with `brew install r-lib/taps/clang-format@10`. You may need to link the newly installed version 10. To do so, run `brew unlink clang-format` followed by `brew link clang-format@10`. Alternatively many IDEs support automatically running `clang-format` every time files are written. ## Code organization cpp11 is a header only library, so all source code exposed to users lives in [inst/include](https://github.com/r-lib/cpp11/tree/main/inst/include). R code used to register functions and for `cpp11::cpp_source()` is in [R/](https://github.com/r-lib/cpp11/tree/main/R). Tests for _only_ the code in `R/` is in [tests/testthat/](https://github.com/r-lib/cpp11/tree/main/tests/testthat) The rest of the code is in a separate [cpp11test/](https://github.com/r-lib/cpp11/tree/main/cpp11test) package included in the source tree. Inside [cpp11test/src](https://github.com/r-lib/cpp11/tree/main/cpp11test/src) the files that start with `test-` are C++ tests using the [Catch](https://testthat.r-lib.org/reference/use_catch.html) support in testthat. In addition there are some regular R tests in [cpp11test/tests/testthat/](https://github.com/r-lib/cpp11/tree/main/cpp11test/tests/testthat). ## Naming conventions - All header files are named with a `.hpp` extension. - All source files are named with a `.cpp` extension. - Public header files should be put in `inst/include/cpp11` - Read only r_vector classes and free functions should be put in the `cpp11` namespace. - Writable r_vector class should be put in the `cpp11::writable` namespace. - Private classes and functions should be put in the `cpp11::internal` namespace. ## Vector classes All of the basic r_vector classes are class templates, the base template is defined in [cpp11/r_vector.hpp](https://github.com/r-lib/cpp11/blob/main/inst/include/cpp11/r_vector.hpp) The template parameter is the type of **value** the particular R vector stores, e.g. `double` for `cpp11::doubles`. This differs from Rcpp, whose first template parameter is the R vector type, e.g. `REALSXP`. The file first has the class declarations, then function definitions further down in the file. Specializations for the various types are in separate files, e.g. [cpp11/doubles.hpp](https://github.com/r-lib/cpp11/blob/main/inst/include/cpp11/doubles.hpp), [cpp11/integers.hpp](https://github.com/r-lib/cpp11/blob/main/inst/include/cpp11/integers.hpp) ## Coercion functions There are two different coercion functions `as_sexp()` takes a C++ object and coerces it to a SEXP object, so it can be used in R. `as_cpp<>()` is a template function that takes a SEXP and creates a C++ object from it The various methods for both functions are defined in [cpp11/as.hpp](https://github.com/r-lib/cpp11/blob/main/inst/include/cpp11/as.hpp) This is definitely the most complex part of the cpp11 code, with extensive use of [template metaprogramming](https://en.wikipedia.org/wiki/Template_metaprogramming). In particular the [substitution failure is not an error (SFINAE)](https://en.wikipedia.org/wiki/Substitution_failure_is_not_an_error) technique is used to control overloading of the functions. If we could use C++20 a lot of this code would be made simpler with [Concepts](https://en.cppreference.com/w/cpp/language/constraints), but alas. The most common C++ types are included in the test suite and should work without issues, as more exotic types are used in real projects additional issues may arise. Some useful links on SFINAE - https://www.fluentcpp.com/2018/05/15/make-sfinae-pretty-1-what-value-sfinae-brings-to-code/, https://www.fluentcpp.com/2018/05/18/make-sfinae-pretty-2-hidden-beauty-sfinae/ ## Protection ### Protect list cpp11 uses an idea proposed by [Luke Tierney](https://github.com/RcppCore/Rcpp/issues/1081#issuecomment-630330838) to use a double linked list with the head preserved to protect objects cpp11 is protecting. Each node in the list uses the head (`CAR`) part to point to the previous node, and the `CDR` part to point to the next node. The `TAG` is used to point to the object being protected. The head and tail of the list have `R_NilValue` as their `CAR` and `CDR` pointers respectively. Calling `preserved.insert()` with a regular R object will add a new node to the list and return a protect token corresponding to the node added. Calling `preserved.release()` on this returned token will release the protection by unlinking the node from the linked list. This scheme scales in O(1) time to release or insert an object vs O(N) or worse time with `R_PreserveObject()` / `R_ReleaseObject()`. These functions are defined in [protect.hpp](https://github.com/r-lib/cpp11/blob/main/inst/include/cpp11/protect.hpp) ### Unwind Protect In R 3.5+ cpp11 uses `R_UnwindProtect` to protect (most) calls to the R API that could fail. These are usually those that allocate memory, though in truth most R API functions could error along some paths. If an error happends under `R_UnwindProtect` cpp11 will throw a C++ exception. This exception is caught by the try catch block defined in the `BEGIN_CPP11` macro in [cpp11/declarations.hpp](https://github.com/r-lib/cpp11/blob/main/inst/include/cpp11/declarations.hpp). The exception will cause any C++ destructors to run, freeing any resources held by C++ objects. After the try catch block exits the R error unwinding is then continued by `R_ContinueUnwind()` and a normal R error results. In R versions prior to 3.5 `R_UnwindProtect()` is not available. Unfortunately the options to emulate it are not ideal. 1. Using `R_TopLevelExec()` works to avoid the C long jump, but because the code is always run in a top level context any errors or messages thrown cannot be caught by `tryCatch()` or similar techniques. 2. Using `R_TryCatch()` is not available prior to R 3.4, and also has a serious bug in R 3.4 (fixed in R 3.5). 3. Calling the R level `tryCatch()` function which contains an expression that runs a C function which then runs the C++ code would be an option, but implementing this is convoluted and it would impact performance, perhaps severely. 4. Have `cpp11::unwind_protect()` be a no-op for these versions. This means any resources held by C++ objects would leak, including cpp11::r_vector / cpp11::sexp objects. None of these options is perfect, here are some pros and cons for each. 1. Causes behavior changes and test failures, so it was ruled out. 2. Was also ruled out since we want to support back to R 3.3. 3. Was ruled out partially because the implementation would be somewhat tricky and more because performance would suffer greatly. 4. is what we now do in cpp11. It leaks protected objects when there are R API errors. If packages are concerned about the leaked memory they can call `cpp11::preserved.release_all()` as needed to release the current protections for all objects managed by cpp11. This is not done automatically because in some cases the protections should persist beyond the `.Call()` boundry, e.g. in vroom altrep objects for example. cpp11/vignettes/sum.Rds0000644000175000017500000000114614071114152014614 0ustar nileshnileshb```b`@YHp94krA!M8,E@ElTA@6&LMLJLN.-I,I)B}sQ_SRZ0@Xe}tĴ{3|~9{g|fu_q8B!OFoIj$k;qt (#<9$C Iy%f+M-L2]itAoKy ^>@gw?H~ze+nҙcKT2J?@>\pbpbd9BX88>VGrCTYZՕ `W:\bgǡK;Cn{wE_KXULr,/$;~-aP\X(~׎3_?hCԢu%zwtP!&TXb,Z㕥& ʊ1''?%Eũ0.K^|z2C8`SgxdAMlE0G$ħ\$$MiUI#cpp11/vignettes/release.Rds0000644000175000017500000000044714071114152015433 0ustar nileshnileshb```b`@YD1s9D2C>diGM:  H&Ra)Hf30=gw{mm@:,;v~H rQӫW?cp8 cpp11 FAQ

cpp11 FAQ

Intended to answer common questions about the usage of cpp11 objects

FAQ

1. How do I add elements to a named list?

  • Use push_back() with named literals to add elements to a named list
#include <cpp11.hpp>

using namespace cpp11;


[[cpp11::register]]
cpp11::list fn() {
  cpp11::writable::list x;
  x.push_back({"foo"_nm = 1});
  return x;
}

2. How do I create a new empty environment?

  • Use the C++ auto keyword with R’s new.env to return a new empty environment. This should then be passed to cpp11::environment before use
#include <cpp11.hpp>

using namespace cpp11;

[[cpp11::register]]
cpp11::environment get_environment() {
  auto new_env = cpp11::package("base")["new.env"];
  cpp11::environment my_env(new_env());
  return my_env;
}

3. How do I assign and retrieve values in an environment? What happens if I try to get a value that doesn’t exist?

  • To assign and retrieve values in an environment use brackets to access values in your environment by name.
  • To check if a named value exists use my_env["bar"] == R_UnboundValue which returns TRUE if the value is unassigned and NULL.
#include <cpp11.hpp>

using namespace cpp11;
[[cpp11::register]]
cpp11::r_bool foo_exists(){
  auto new_env = cpp11::package("base")["new.env"];
  cpp11::environment my_env(new_env());
  
  my_env["foo"] = 5;
  cpp11::r_bool fofo = (as_cpp<int>(my_env["foo"]) == 5);
  
  return (fofo);
}
#include <cpp11.hpp>

using namespace cpp11;

[[cpp11::register]]
cpp11::r_bool bar_exists(){
  auto new_env = cpp11::package("base")["new.env"];
  cpp11::environment my_env(new_env());
  
  
  cpp11::r_bool barbar = (my_env["bar"] == R_UnboundValue);
  
  return (!barbar);
}

4. How can I create a cpp11:raws from a std::string?

  • Push back the contents individually
#include <cpp11.hpp>

using namespace cpp11;
[[cpp11::register]]
cpp11::raws push() {
  
    std::string x("hi");
    cpp11::writable::raws out;
  
    for (auto c : x) {
      out.push_back(c);
    }
  
    return out;
  }

5. What are the underlying types of cpp11 objects?

vector element
integers int
doubles double
logical r_bool
strings r_string
raws uint8_t
list SEXP

6. How do I create a new empty list?

cpp11::writable::list x;

7. How do I retrieve (named) elements from a named vector/list?

x["foo"]

8. How can I tell whether a vector is named?

TODO

9. What are the types for C++ iterators?

  • Iterators are internal classes. An example of an iterator for doubles would be cpp11::doubles::iterator

10. How do I return cpp11::writable::logicals() objects?

  • Use brace initialization:
#include <cpp11.hpp>

using namespace cpp11;
[[cpp11::register]]
cpp11::writable::logicals fn2() {
  return {false};
}
  • Otherwise this will return a 0 length logical vector:
#include <cpp11.hpp>

using namespace cpp11;
[[cpp11::register]]
cpp11::writable::logicals fn() {
  return false;
}
  • This happens because the first case uses the R_xlen_t size constructor and the second uses the std::initializer_list<>() constructor.

11. Does cpp11 support default arguments?

  • cpp11 does not support default arguments. Instead cpp11 registered functions can be wrapped in an R function for default argument support
#include <cpp11.hpp>
[[cpp11::register]]
std::string fn(bool length) {
  if(length) {
    return ("length");
  }
  return ("width");
}
full_fn <- function(length = FALSE) {
  fn(length)
}
full_fn(TRUE)
#> [1] "length"
#> "length"
full_fn()
#> [1] "width"
#> "width"

12. Why do I have to include using namespace in my code as well as std:: inside of [[cpp11::register]] functions?

  • Since namespace definitions will not be in the registration file generated by cpp11, types used in function definitions that are decorated with [[cpp11::register]] need to be fully qualified. However type names within those functions will work as expected.

The following won’t compile

#include <cpp11.hpp>
#include <string>

using namespace std;

[[cpp11::register]]
string foobar() {
  return string("foo") + "-bar";
}

But this will compile and work as expected

#include <cpp11.hpp>
#include <string>

using namespace std;
[[cpp11::register]]
std::string foobar() {
  return string("foo") + "-bar";
}

13. How do I modify a vector in place?

  • Using writable:: will always make a copy, but it has a move constructor, so you can use cpp11::writable::integers(std::move(x)) and it won’t make a copy of the data.
cpp11/vignettes/cpp11.Rmd0000644000175000017500000012216114120442712014730 0ustar nileshnilesh--- title: "Get started with cpp11" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Get started with cpp11} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = as.logical(Sys.getenv("CPP11_EVAL", "false")) ) ``` *This content is adapted (with permission) from the [Rcpp chapter](https://adv-r.hadley.nz/rcpp.html) of Hadley Wickham's book Advanced R.* ## Introduction Sometimes R code just isn't fast enough. You've used profiling to figure out where your bottlenecks are, and you've done everything you can in R, but your code still isn't fast enough. In this vignette you'll learn how to improve performance by rewriting key functions in C++. This magic comes by way of the [cpp11](https://github.com/r-lib/cpp11) package. cpp11 makes it very simple to connect C++ to R. While it is _possible_ to write C or Fortran code for use in R, it will be painful by comparison. cpp11 provides a clean, approachable API that lets you write high-performance code, insulated from R's more complex C API. Typical bottlenecks that C++ can address include: * Loops that can't be easily vectorised because subsequent iterations depend on previous ones. * Recursive functions, or problems which involve calling functions millions of times. The overhead of calling a function in C++ is much lower than in R. * Problems that require advanced data structures and algorithms that R doesn't provide. Through the standard template library (STL), C++ has efficient implementations of many important data structures, from ordered maps to double-ended queues. The aim of this vignette is to discuss only those aspects of C++ and cpp11 that are absolutely necessary to help you eliminate bottlenecks in your code. We won't spend much time on advanced features like object-oriented programming or templates because the focus is on writing small, self-contained functions, not big programs. A working knowledge of C++ is helpful, but not essential. Many good tutorials and references are freely available, including and . For more advanced topics, the _Effective C++_ series by Scott Meyers is a popular choice. ### Outline * Section [intro](#intro) teaches you how to write C++ by converting simple R functions to their C++ equivalents. You'll learn how C++ differs from R, and what the key scalar, vector, and matrix classes are called. * Section [cpp_source](#cpp-source) shows you how to use `cpp11::cpp_source()` to load a C++ file from disk in the same way you use `source()` to load a file of R code. * Section [classes](#classes) discusses how to modify attributes from cpp11, and mentions some of the other important classes. * Section [na](#na) teaches you how to work with R's missing values in C++. * Section [stl](#stl) shows you how to use some of the most important data structures and algorithms from the standard template library, or STL, built-in to C++. * Section [case-studies](#case-studies) shows two real case studies where cpp11 was used to get considerable performance improvements. * Section [package](#package) teaches you how to add C++ code to an R package. * Section [more](#more) concludes the vignette with pointers to more resources to help you learn cpp11 and C++. ### Prerequisites We'll use [cpp11](https://github.com/r-lib/cpp11) to call C++ from R: ```{r setup} library(cpp11) ``` You'll also need a working C++ compiler. To get it: * On Windows, install [Rtools](https://CRAN.R-project.org/bin/windows/Rtools/). * On Mac, install Xcode from the app store. * On Linux, `sudo apt-get install r-base-dev` or similar. ## Getting started with C++ {#intro} `cpp_function()` allows you to write C++ functions in R: ```{r add} cpp_function('int add(int x, int y, int z) { int sum = x + y + z; return sum; }') # add works like a regular R function add add(1, 2, 3) ``` When you run the above code, cpp11 will compile the C++ code and construct an R function that connects to the compiled C++ function. There's a lot going on underneath the hood but cpp11 takes care of all the details so you don't need to worry about them. The following sections will teach you the basics by translating simple R functions to their C++ equivalents. We'll start simple with a function that has no inputs and a scalar output, and then make it progressively more complicated: * Scalar input and scalar output * Vector input and scalar output * Vector input and vector output * Matrix input and vector output ### No inputs, scalar output Let's start with a very simple function. It has no arguments and always returns the integer 1: ```{r one-r} one <- function() 1L ``` The equivalent C++ function is: ```cpp int one() { return 1; } ``` We can compile and use this from R with `cpp_function()` ```{r one-cpp} cpp_function('int one() { return 1; }') ``` This small function illustrates a number of important differences between R and C++: * The syntax to create a function looks like the syntax to call a function; you don't use assignment to create functions as you do in R. * You must declare the type of output the function returns. This function returns an `int` (a scalar integer). The classes for the most common types of R vectors are: `doubles`, `integers`, `strings`, and `logicals`. * Scalars and vectors are different. The scalar equivalents of numeric, integer, character, and logical vectors are: `double`, `int`, `String`, and `bool`. * You must use an explicit `return` statement to return a value from a function. * Every statement is terminated by a `;`. ### Scalar input, scalar output The next example function implements a scalar version of the `sign()` function which returns 1 if the input is positive, and -1 if it's negative: ```{r sign} sign_r <- function(x) { if (x > 0) { 1 } else if (x == 0) { 0 } else { -1 } } cpp_function('int sign_cpp(int x) { if (x > 0) { return 1; } else if (x == 0) { return 0; } else { return -1; } }') ``` In the C++ version: * We declare the type of each input in the same way we declare the type of the output. While this makes the code a little more verbose, it also makes clear the type of input the function needs. * The `if` syntax is identical --- while there are some big differences between R and C++, there are also lots of similarities! C++ also has a `while` statement that works the same way as R's. As in R you can use `break` to exit the loop, but to skip one iteration you need to use `continue` instead of `next`. ### Vector input, scalar output One big difference between R and C++ is that the cost of loops is much lower in C++. For example, we could implement the `sum` function in R using a loop. If you've been programming in R a while, you'll probably have a visceral reaction to this function! ```{r sum-r} sum_r <- function(x) { total <- 0 for (i in seq_along(x)) { total <- total + x[i] } total } ``` In C++, loops have very little overhead, so it's fine to use them. In Section [stl](#stl), you'll see alternatives to `for` loops that more clearly express your intent; they're not faster, but they can make your code easier to understand. ```{r sum-cpp} cpp_function('double sum_cpp(doubles x) { int n = x.size(); double total = 0; for(int i = 0; i < n; ++i) { total += x[i]; } return total; }') ``` The C++ version is similar, but: * To find the length of the vector, we use the `.size()` method, which returns an integer. C++ methods are called with `.` (i.e., a full stop). * The `for` statement has a different syntax: `for(init; check; increment)`. This loop is initialised by creating a new variable called `i` with value 0. Before each iteration we check that `i < n`, and terminate the loop if it's not. After each iteration, we increment the value of `i` by one, using the special prefix operator `++` which increases the value of `i` by 1. * In C++, vector indices start at 0, which means that the last element is at position `n - 1`. I'll say this again because it's so important: __IN C++, VECTOR INDICES START AT 0__! This is a very common source of bugs when converting R functions to C++. * Use `=` for assignment, not `<-`. * C++ provides operators that modify in-place: `total += x[i]` is equivalent to `total = total + x[i]`. Similar in-place operators are `-=`, `*=`, and `/=`. This is a good example of where C++ is much more efficient than R. As shown by the following microbenchmark, `sumC()` is competitive with the built-in (and highly optimised) `sum()`, while `sumR()` is several orders of magnitude slower. ```{r sum-bench} x <- runif(1e3) bench::mark( sum(x), sum_cpp(x), sum_r(x) )[1:6] ``` ### Vector input, vector output Next we'll create a function that computes the Euclidean distance between a value and a vector of values: ```{r pdist-r} pdist_r <- function(x, ys) { sqrt((x - ys) ^ 2) } ``` In R, it's not obvious that we want `x` to be a scalar from the function definition, and we'd need to make that clear in the documentation. That's not a problem in the C++ version because we have to be explicit about types: ```{r pdist-cpp} cpp_function('doubles pdist_cpp(double x, doubles ys) { int n = ys.size(); writable::doubles out(n); for(int i = 0; i < n; ++i) { out[i] = sqrt(pow(ys[i] - x, 2.0)); } return out; }') ``` This function introduces a few new concepts: * Because we are creating a new vector we need to use `writable::doubles` rather than the read-only `doubles`. * We create a new numeric vector of length `n` with a constructor: `cpp11::writable::doubles out(n)`. Another useful way of making a vector is to copy an existing one: `cpp11::doubles zs(ys)`. * C++ uses `pow()`, not `^`, for exponentiation. Note that because the R version is fully vectorised, it's already going to be fast. ```{r} y <- runif(1e6) bench::mark( pdist_r(0.5, y), pdist_cpp(0.5, y) )[1:6] ``` On my computer, it takes around 5 ms with a 1 million element `y` vector. The C++ function is about 2.5 times faster, ~2 ms, but assuming it took you 10 minutes to write the C++ function, you'd need to run it ~200,000 times to make rewriting worthwhile. The reason why the C++ function is faster is subtle, and relates to memory management. The R version needs to create an intermediate vector the same length as y (`x - ys`), and allocating memory is an expensive operation. The C++ function avoids this overhead because it uses an intermediate scalar. ```{r, include = FALSE} # 5e-3 * x == 2e-3 * x + 10 * 60 600 / (5e-3 - 2e-3) ``` ### Using cpp_source {#cpp-source} So far, we've used inline C++ with `cpp_function()`. This makes presentation simpler, but for real problems, it's usually easier to use stand-alone C++ files and then source them into R using `cpp_source()`. This lets you take advantage of text editor support for C++ files (e.g., syntax highlighting) as well as making it easier to identify the line numbers in compilation errors. Your stand-alone C++ file should have extension `.cpp`, and needs to start with: ```cpp #include "cpp11.hpp" using namespace cpp11; ``` And for each function that you want available within R, you need to prefix it with: ```cpp [[cpp11::register]] ``` If you're familiar with roxygen2, you might wonder how this relates to `@export`. `cpp11::register` registers a C++ function to be called from R. `@export` controls whether a function is exported from a package and made available to the user. To compile the C++ code, use `cpp_source("path/to/file.cpp")`. This will create the matching R functions and add them to your current session. Note that these functions can not be saved in a `.Rdata` file and reloaded in a later session; they must be recreated each time you restart R. This example also illustrates a different kind of a `for` loop, a for-each loop. ```{cpp11} #include "cpp11/doubles.hpp" using namespace cpp11; [[cpp11::register]] double mean_cpp(doubles x) { int n = x.size(); double total = 0; for(double value : x) { total += value; } return total / n; } ``` NB: if you run this code, you'll notice that `mean_cpp()` is faster than the built-in `mean()`. This is because it trades numerical accuracy for speed. For the remainder of this vignette C++ code will be presented stand-alone rather than wrapped in a call to `cpp_function`. If you want to try compiling and/or modifying the examples you should paste them into a C++ source file that includes the elements described above. This is easy to do in RMarkdown by using `{cpp11}` instead of `{r}` at the beginning of your code blocks. ### Exercises 1. With the basics of C++ in hand, it's now a great time to practice by reading and writing some simple C++ functions. For each of the following functions, read the code and figure out what the corresponding base R function is. You might not understand every part of the code yet, but you should be able to figure out the basics of what the function does. ```{cpp11} #include "cpp11.hpp" using namespace cpp11; namespace writable = cpp11::writable; [[cpp11::register]] double f1(doubles x) { int n = x.size(); double y = 0; for(int i = 0; i < n; ++i) { y += x[i] / n; } return y; } [[cpp11::register]] doubles f2(doubles x) { int n = x.size(); writable::doubles out(n); out[0] = x[0]; for(int i = 1; i < n; ++i) { out[i] = out[i - 1] + x[i]; } return out; } [[cpp11::register]] bool f3(logicals x) { int n = x.size(); for(int i = 0; i < n; ++i) { if (x[i]) { return true; } } return false; } [[cpp11::register]] int f4(cpp11::function pred, list x) { int n = x.size(); for(int i = 0; i < n; ++i) { logicals res(pred(x[i])); if (res[0]) { return i + 1; } } return 0; } ``` 1. To practice your function writing skills, convert the following functions into C++. For now, assume the inputs have no missing values. 1. `all()`. 2. `cumprod()`, `cummin()`, `cummax()`. 3. `diff()`. Start by assuming lag 1, and then generalise for lag `n`. 4. `range()`. 5. `var()`. Read about the approaches you can take on [Wikipedia](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance). Whenever implementing a numerical algorithm, it's always good to check what is already known about the problem. ## Other classes {#classes} You've already seen the basic vector classes (`integers`, `doubles`, `logicals`, `strings`) and their scalar (`int`, `double`, `bool`, `string`) equivalents. cpp11 also provides wrappers for other base data types. The most important are for lists and data frames, functions, and attributes, as described below. ### Lists and data frames cpp11 also provides `list` and `data_frame` classes, but they are more useful for output than input. This is because lists and data frames can contain arbitrary classes but C++ needs to know their classes in advance. If the list has known structure (e.g., it's an S3 object), you can extract the components and manually convert them to their C++ equivalents with `as_cpp()`. For example, the object created by `lm()`, the function that fits a linear model, is a list whose components are always of the same type. The following code illustrates how you might extract the mean percentage error (`mpe()`) of a linear model. This isn't a good example of when to use C++, because it's so easily implemented in R, but it shows how to work with an important S3 class. Note the use of `Rf_inherits()` and the `stop()` to check that the object really is a linear model. ```{cpp11} #include "cpp11.hpp" using namespace cpp11; [[cpp11::register]] double mpe(list mod) { if (!Rf_inherits(mod, "lm")) { stop("Input must be a linear model"); } doubles resid(mod["residuals"]); doubles fitted(mod["fitted.values"]); int n = resid.size(); double err = 0; for(int i = 0; i < n; ++i) { err += resid[i] / (fitted[i] + resid[i]); } return err / n; } ``` ```{r} mod <- lm(mpg ~ wt, data = mtcars) mpe(mod) ``` ### Functions {#functions-cpp11} You can put R functions in an object of type `function`. This makes calling an R function from C++ straightforward. The only challenge is that we don't know what type of output the function will return, so we use the catchall type `sexp`. This stands for S-Expression and is used as the type of all R Objects in the internal C code. ```{cpp11} #include "cpp11.hpp" using namespace cpp11; namespace writable = cpp11::writable; [[cpp11::register]] sexp call_with_one(function f) { return f(1); } ``` ```{r} call_with_one(function(x) x + 1) call_with_one(paste) ``` Calling R functions with positional arguments is obvious: ```cpp f("y", 1); ``` But you need a special syntax for named arguments: ```cpp using namespace cpp11::literals; f("x"_nm = "y", "value"_nm = 1); ``` ### Attributes All R objects have attributes, which can be queried and modified with `.attr()`. cpp11 also provides `.names()` as an alias for the `names` attribute. The following code snippet illustrates these methods. Note the use of `{}` [initializer list](https://en.cppreference.com/w/cpp/utility/initializer_list) syntax. This allows you to create an R vector from C++ scalar values: ```{r attribs, engine = "cpp11"} #include "cpp11.hpp" using namespace cpp11; namespace writable = cpp11::writable; [[cpp11::register]] doubles attribs() { writable::doubles out = {1., 2., 3.}; out.names() = {"a", "b", "c"}; out.attr("my-attr") = "my-value"; out.attr("class") = "my-class"; return out; } ``` ## Missing values {#na} If you're working with missing values, you need to know two things: * How R's missing values behave in C++'s scalars (e.g., `double`). * How to get and set missing values in vectors (e.g., `doubles`). ### Scalars The following code explores what happens when you take one of R's missing values, coerce it into a scalar, and then coerce back to an R vector. Note that this kind of experimentation is a useful way to figure out what any operation does. ```{r missings, engine = "cpp11"} #include "cpp11.hpp" using namespace cpp11; [[cpp11::register]] list scalar_missings() { int int_s = NA_INTEGER; r_string chr_s = NA_STRING; bool lgl_s = NA_LOGICAL; double num_s = NA_REAL; return writable::list({as_sexp(int_s), as_sexp(chr_s), as_sexp(lgl_s), as_sexp(num_s)}); } ``` ```{r} str(scalar_missings()) ``` With the exception of `bool`, things look pretty good here: all of the missing values have been preserved. However, as we'll see in the following sections, things are not quite as straightforward as they seem. #### Integers With integers, missing values are stored as the smallest integer. If you don't do anything to them, they'll be preserved. But, since C++ doesn't know that the smallest integer has this special behaviour, if you do anything to it you're likely to get an incorrect value: for example, `cpp_eval('NA_INTEGER + 1')` gives -2147483647. So if you want to work with missing values in integers, either use a length 1 `integers` or be very careful with your code. #### Doubles With doubles, you may be able to get away with ignoring missing values and working with NaNs (not a number). This is because R's NA is a special type of IEEE 754 floating point number NaN. So any logical expression that involves a NaN (or in C++, NAN) always evaluates as FALSE: ```{r} cpp_eval("NAN == 1") cpp_eval("NAN < 1") cpp_eval("NAN > 1") cpp_eval("NAN == NAN") ``` (Here I'm using `cpp_eval()` which allows you to see the result of running a single C++ expression, making it excellent for this sort of interactive experimentation.) But be careful when combining them with Boolean values: ```{r} cpp_eval("NAN && TRUE") cpp_eval("NAN || FALSE") ``` However, in numeric contexts NaNs will propagate NAs: ```{r} cpp_eval("NAN + 1") cpp_eval("NAN - 1") cpp_eval("NAN / 1") cpp_eval("NAN * 1") ``` ### Strings `String` is a scalar string class introduced by cpp11, so it knows how to deal with missing values. ### Boolean C++'s `bool` has two possible values (`true` or `false`), a logical vector in R has three (`TRUE`, `FALSE`, and `NA`). If you coerce a length 1 logical vector, make sure it doesn't contain any missing values; otherwise they will be converted to TRUE. One way to fix this is to use `int` instead, as this can represent `TRUE`, `FALSE`, and `NA`. ### Vectors {#vectors-cpp11} With vectors, you need to use a missing value specific to the type of vector, `NA_REAL`, `NA_INTEGER`, `NA_LOGICAL`, `NA_STRING`: ```{r, engine = "cpp11"} #include "cpp11.hpp" using namespace cpp11; namespace writable = cpp11::writable; [[cpp11::register]] list missing_sampler() { return writable::list({ writable::doubles({NA_REAL}), writable::integers({NA_INTEGER}), writable::logicals({r_bool(NA_LOGICAL)}), writable::strings({NA_STRING}) }); } ``` ```{r} str(missing_sampler()) ``` ### Exercises 1. Rewrite any of the functions from the first exercise to deal with missing values. If `na_rm` is true, ignore the missing values. If `na_rm` is false, return a missing value if the input contains any missing values. Some good functions to practice with are `min()`, `max()`, `range()`, `mean()`, and `var()`. 1. Rewrite `cumsum()` and `diff()` so they can handle missing values. Note that these functions have slightly more complicated behaviour. ## Standard Template Library {#stl} The real strength of C++ is revealed when you need to implement more complex algorithms. The standard template library (STL) provides a set of extremely useful data structures and algorithms. This section will explain some of the most important algorithms and data structures and point you in the right direction to learn more. I can't teach you everything you need to know about the STL, but hopefully the examples will show you the power of the STL, and persuade you that it's useful to learn more. If you need an algorithm or data structure that isn't implemented in STL, one place to look is [boost](https://www.boost.org/doc/). Installing boost on your computer is beyond the scope of this vignette, but once you have it installed, you can use boost data structures and algorithms by including the appropriate header file with (e.g.) `#include `. ### Using iterators Iterators are used extensively in the STL: many functions either accept or return iterators. They are the next step up from basic loops, abstracting away the details of the underlying data structure. Iterators have three main operators: 1. Advance with `++`. 1. Get the value they refer to, or __dereference__, with `*`. 1. Compare with `==`. For example we could re-write our sum function using iterators: ```{r, engine = "cpp11"} #include "cpp11.hpp" using namespace cpp11; [[cpp11::register]] double sum2(doubles x) { double total = 0; for(auto it = x.begin(); it != x.end(); ++it) { total += *it; } return total; } ``` The main changes are in the for loop: * We start at `x.begin()` and loop until we get to `x.end()`. A small optimization is to store the value of the end iterator so we don't need to look it up each time. This only saves about 2 ns per iteration, so it's only important when the calculations in the loop are very simple. * Instead of indexing into x, we use the dereference operator to get its current value: `*it`. * Notice we use `auto` rather than giving the type of the iterator. This code can be simplified still further through the use of a C++11 feature: range-based for loops. ```{r, engine = "cpp11"} #include "cpp11.hpp" using namespace cpp11; [[cpp11::register]] double sum3(doubles xs) { double total = 0; for(auto x : xs) { total += x; } return total; } ``` Iterators also allow us to use the C++ equivalents of the apply family of functions. For example, we could again rewrite `sum()` to use the `accumulate()` function, which takes a starting and an ending iterator, and adds up all the values in the vector. The third argument to `accumulate` gives the initial value: it's particularly important because this also determines the data type that `accumulate` uses (so we use `0.0` and not `0` so that `accumulate` uses a `double`, not an `int`.). To use `accumulate()` we need to include the `` header. ```{r, engine = "cpp11"} #include #include "cpp11.hpp" using namespace cpp11; [[cpp11::register]] double sum4(doubles x) { return std::accumulate(x.begin(), x.end(), 0.0); } ``` ```{r, include = FALSE, error = FALSE} # Verify that our sum implementations work local({ x <- c(.5, .1, .3, .7, 12.) stopifnot(identical(sum(x), sum2(x))) stopifnot(identical(sum(x), sum3(x))) stopifnot(identical(sum(x), sum4(x))) }) ``` ### Algorithms The `` header provides a large number of algorithms that work with iterators. A good reference is available at . For example, we could write a basic cpp11 version of `findInterval()` that takes two arguments, a vector of values and a vector of breaks, and locates the bin that each x falls into. This shows off a few more advanced iterator features. Read the code below and see if you can figure out how it works. ```{r, engine = "cpp11"} #include #include "cpp11.hpp" using namespace cpp11; [[cpp11::register]] integers findInterval2(doubles x, doubles breaks) { writable::integers out(x.size()); auto out_it = out.begin(); for (auto&& val : x) { auto pos = std::upper_bound(breaks.begin(), breaks.end(), val); *out_it = std::distance(breaks.begin(), pos); ++out_it; } return out; } ``` ```{r, include = FALSE, error = FALSE} # Verify that our findInterval2 implementation works local({ n <- 1e3 x <- sort(round(stats::rt(n, df = 2), 2)) tt <- c(-n, seq(-2, 2, length = n + 1), n) stopifnot(identical(findInterval(tt, x), findInterval2(tt, x))) }) ``` The key points are: * We step through two iterators (input and output) simultaneously. * We can assign into an dereferenced iterator (`out_it`) to change the values in `out`. * `upper_bound()` returns an iterator. If we wanted the value of the `upper_bound()` we could dereference it; to figure out its location, we use the `distance()` function. When in doubt, it is generally better to use algorithms from the STL than hand rolled loops. In _Effective STL_, Scott Meyers gives three reasons: efficiency, correctness, and maintainability. Algorithms from the STL are written by C++ experts to be extremely efficient, and they have been around for a long time so they are well tested. Using standard algorithms also makes the intent of your code more clear, helping to make it more readable and more maintainable. ### Data structures {#data-structures-cpp11} The STL provides a large set of data structures: `array`, `bitset`, `list`, `forward_list`, `map`, `multimap`, `multiset`, `priority_queue`, `queue`, `deque`, `set`, `stack`, `unordered_map`, `unordered_set`, `unordered_multimap`, `unordered_multiset`, and `vector`. The most important of these data structures are the `vector`, the `unordered_set`, and the `unordered_map`. We'll focus on these three in this section, but using the others is similar: they just have different performance trade-offs. For example, the `deque` (pronounced "deck") has a very similar interface to vectors but a different underlying implementation that has different performance trade-offs. You may want to try it for your problem. A good reference for STL data structures is --- I recommend you keep it open while working with the STL. cpp11 knows how to convert from many STL data structures to their R equivalents, so you can return them from your functions without explicitly converting to R data structures. ### Vectors {#vectors-stl} An STL vector is very similar to an R vector, except that it grows efficiently. This makes STL vectors appropriate to use when you don't know in advance how big the output will be. Vectors are templated, which means that you need to specify the type of object the vector will contain when you create it: `vector`, `vector`, `vector`, `vector`. You can access individual elements of a vector using the standard `[]` notation, and you can add a new element to the end of the vector using `.push_back()`. If you have some idea in advance how big the vector will be, you can use `.reserve()` to allocate sufficient storage. The following code implements run length encoding (`rle()`). It produces two vectors of output: a vector of values, and a vector `lengths` giving how many times each element is repeated. It works by looping through the input vector `x` comparing each value to the previous: if it's the same, then it increments the last value in `lengths`; if it's different, it adds the value to the end of `values`, and sets the corresponding length to 1. ```{r, engine = "cpp11"} #include "cpp11.hpp" #include using namespace cpp11; namespace writable = cpp11::writable; [[cpp11::register]] list rle_cpp(doubles x) { std::vector lengths; std::vector values; // Initialise first value int i = 0; double prev = x[0]; values.push_back(prev); lengths.push_back(1); for(auto it = x.begin() + 1; it != x.end(); ++it) { if (prev == *it) { lengths[i]++; } else { values.push_back(*it); lengths.push_back(1); i++; prev = *it; } } return writable::list({ "lengths"_nm = lengths, "values"_nm = values }); } ``` (An alternative implementation would be to replace `i` with the iterator `lengths.rbegin()` which always points to the last element of the vector. You might want to try implementing that.) Other methods of a vector are described at . ### Sets Sets maintain a unique set of values, and can efficiently tell if you've seen a value before. They are useful for problems that involve duplicates or unique values (like `unique`, `duplicated`, or `in`). C++ provides both ordered (`std::set`) and unordered sets (`std::unordered_set`), depending on whether or not order matters for you. Unordered sets can somtimes be much faster (because they use a hash table internally rather than a tree). Often even if you need an ordered set, you could consider using an unordered set and then sorting the output. Benchmarking with your expected dataset is the best way to determine which is fastest for your data. Like vectors, sets are templated, so you need to request the appropriate type of set for your purpose: `unordered_set`, `unordered_set`, etc. More details are available at and . The following function uses an unordered set to implement an equivalent to `duplicated()` for integer vectors. Note the use of `seen.insert(x[i]).second`. `insert()` returns a pair, the `.first` value is an iterator that points to element and the `.second` value is a Boolean that's true if the value was a new addition to the set. ```{r, engine = "cpp11"} #include #include "cpp11.hpp" using namespace cpp11; namespace writable = cpp11::writable; [[cpp11::register]] logicals duplicated_cpp(integers x) { std::unordered_set seen; int n = x.size(); writable::logicals out(n); for (int i = 0; i < n; ++i) { out[i] = !seen.insert(x[i]).second; } return out; } ``` ### Exercises To practice using the STL algorithms and data structures, implement the following using R functions in C++, using the hints provided: 1. `median.default()` using `partial_sort`. 1. `%in%` using `unordered_set` and the `find()` or `count()` methods. 1. `unique()` using an `unordered_set` (challenge: do it in one line!). 1. `min()` using `std::min()`, or `max()` using `std::max()`. 1. `which.min()` using `min_element`, or `which.max()` using `max_element`. 1. `setdiff()`, `union()`, and `intersect()` for integers using sorted ranges and `set_union`, `set_intersection` and `set_difference`. ## Case studies {#case-studies} The following case studies illustrate some real life uses of C++ to replace slow R code. ### Gibbs sampler The following case study updates an example [blogged about](http://dirk.eddelbuettel.com/blog/2011/07/14/) by Dirk Eddelbuettel, illustrating the conversion of a Gibbs sampler in R to C++. The R and C++ code shown below is very similar (it only took a few minutes to convert the R version to the C++ version), but runs about 30 times faster on my computer. Dirk's blog post also shows another way to make it even faster: using the faster random number generator functions in GSL (easily accessible from R through the RcppGSL package) can make it another two to three times faster. The R code is as follows: ```{r} gibbs_r <- function(N, thin) { mat <- matrix(nrow = N, ncol = 2) x <- y <- 0 for (i in 1:N) { for (j in 1:thin) { x <- rgamma(1, 3, y * y + 4) y <- rnorm(1, 1 / (x + 1), 1 / sqrt(2 * (x + 1))) } mat[i, ] <- c(x, y) } mat } ``` This is relatively straightforward to convert to C++. We: * Add type declarations to all variables. * Use `(` instead of `[` to index into the matrix. * Include "Rmath.h" and call the functions with `Rf_`. ```{r, engine = "cpp11"} #include "cpp11/matrix.hpp" #include "cpp11/doubles.hpp" #include "Rmath.h" using namespace cpp11; namespace writable = cpp11::writable; [[cpp11::register]] cpp11::doubles_matrix<> gibbs_cpp(int N, int thin) { writable::doubles_matrix<> mat(N, 2); double x = 0, y = 0; for (int i = 0; i < N; i++) { for (int j = 0; j < thin; j++) { x = Rf_rgamma(3., 1. / double(y * y + 4)); y = Rf_rnorm(1. / (x + 1.), 1. / sqrt(2. * (x + 1.))); } mat(i, 0) = x; mat(i, 1) = y; } return mat; } ``` Benchmarking the two implementations yields a significant speedup for running the loops in C++: ```{r} bench::mark( r = { set.seed(42) gibbs_r(100, 10) }, cpp = { set.seed(42) gibbs_cpp(100, 10) }, check = TRUE, relative = TRUE ) ``` ### R vectorisation versus C++ vectorisation This example is adapted from ["Rcpp is smoking fast for agent-based models in data frames"](https://gweissman.github.io/post/rcpp-is-smoking-fast-for-agent-based-models-in-data-frames/). The challenge is to predict a model response from three inputs. The basic R version of the predictor looks like: ```{r} vacc1a <- function(age, female, ily) { p <- 0.25 + 0.3 * 1 / (1 - exp(0.04 * age)) + 0.1 * ily p <- p * if (female) 1.25 else 0.75 p <- max(0, p) p <- min(1, p) p } ``` We want to be able to apply this function to many inputs, so we might write a vector-input version using a for loop. ```{r} vacc1 <- function(age, female, ily) { n <- length(age) out <- numeric(n) for (i in seq_len(n)) { out[i] <- vacc1a(age[i], female[i], ily[i]) } out } ``` If you're familiar with R, you'll have a gut feeling that this will be slow, and indeed it is. There are two ways we could attack this problem. If you have a good R vocabulary, you might immediately see how to vectorise the function (using `ifelse()`, `pmin()`, and `pmax()`). Alternatively, we could rewrite `vacc1a()` and `vacc1()` in C++, using our knowledge that loops and function calls have much lower overhead in C++. Either approach is fairly straightforward. In R: ```{r} vacc2 <- function(age, female, ily) { p <- 0.25 + 0.3 * 1 / (1 - exp(0.04 * age)) + 0.1 * ily p <- p * ifelse(female, 1.25, 0.75) p <- pmax(0, p) p <- pmin(1, p) p } ``` (If you've worked R a lot you might recognise some potential bottlenecks in this code: `ifelse`, `pmin`, and `pmax` are known to be slow, and could be replaced with `p * 0.75 + p * 0.5 * female`, `p[p < 0] <- 0`, `p[p > 1] <- 1`. You might want to try timing those variations.) Or in C++: ```{r engine = "cpp11"} #include "cpp11.hpp" using namespace cpp11; namespace writable = cpp11::writable; [[cpp11::register]] double vacc3a(double age, bool female, bool ily){ double p = 0.25 + 0.3 * 1 / (1 - exp(0.04 * age)) + 0.1 * ily; p = p * (female ? 1.25 : 0.75); p = std::max(p, 0.0); p = std::min(p, 1.0); return p; } [[cpp11::register]] doubles vacc3(doubles age, logicals female, logicals ily) { int n = age.size(); writable::doubles out(n); for(int i = 0; i < n; ++i) { out[i] = vacc3a(age[i], female[i], ily[i]); } return out; } ``` We next generate some sample data, and check that all three versions return the same values: ```{r} n <- 1000 age <- rnorm(n, mean = 50, sd = 10) female <- sample(c(T, F), n, rep = TRUE) ily <- sample(c(T, F), n, prob = c(0.8, 0.2), rep = TRUE) stopifnot( all.equal(vacc1(age, female, ily), vacc2(age, female, ily)), all.equal(vacc1(age, female, ily), vacc3(age, female, ily)) ) ``` The original blog post forgot to do this, and introduced a bug in the C++ version: it used `0.004` instead of `0.04`. Finally, we can benchmark our three approaches: ```{r} bench::mark( vacc1 = vacc1(age, female, ily), vacc2 = vacc2(age, female, ily), vacc3 = vacc3(age, female, ily) ) ``` Not surprisingly, our original approach with loops is very slow. Vectorising in R gives a huge speedup, and we can eke out even more performance (about ten times) with the C++ loop. I was a little surprised that the C++ was so much faster, but it is because the R version has to create 11 vectors to store intermediate results, where the C++ code only needs to create 1. ## Using cpp11 in a package {#package} The same C++ code that is used with `cpp_source()` can also be bundled into a package. There are several benefits of moving code from a stand-alone C++ source file to a package: 1. Your code can be made available to users without C++ development tools. 1. Multiple source files and their dependencies are handled automatically by the R package build system. 1. Packages provide additional infrastructure for testing, documentation, and consistency. To add `cpp11` to an existing package first put your C++ files in the `src/` directory of your package. Then add the following to your `DESCRIPTION` file: ``` LinkingTo: cpp11 SystemRequirements: C++11 ``` and add the following [roxygen](https://roxygen2.r-lib.org/) directive somewhere in your package's R files. (A common location is `R/pkgname-package.R`) ``` #' @useDynLib pkgname, .registration = TRUE ``` You'll then need to run [`devtools::document()`](https://devtools.r-lib.org/reference/document.html) to update your `NAMESPACE` file to include the `useDynLib` statement. The easiest way to set this up is to call `usethis::use_cpp11()`, which will do the above steps for your automatically. Before building the package, you'll need to run `cpp11::cpp_register()`. This function scans the C++ files for `[[cpp11::register]]` attributes and generates the binding code required to make the functions available in R. Re-run `cpp11::cpp_register()` whenever functions are added, removed, or have their signatures changed. If you are using `devtools` to develop your package this is done automatically by the pkgbuild package when your package has `LinkingTo: cpp11` in its DESCRIPTION file. ## Learning more {#more} C++ is a large, complex language that takes years to master. If you would like to dive deeper or write more complex functions other resources I've found helpful in learning C++ are: * [_Effective C++_](https://www.aristeia.com/books.html) and [_Effective STL_](https://www.aristeia.com/books.html) * [_C++ Annotations_](http://www.icce.rug.nl/documents/cplusplus/cplusplus.html), aimed at knowledgeable users of C (or any other language using a C-like grammar, like Perl or Java) who would like to know more about, or make the transition to, C++. * [_Algorithm Libraries_](https://www.cs.helsinki.fi/u/tpkarkka/alglib/k06/), which provides a more technical, but still concise, description of important STL concepts. (Follow the links under notes.) Writing performant code may also require you to rethink your basic approach: a solid understanding of basic data structures and algorithms is very helpful here. That's beyond the scope of this vignette, but I'd suggest the [_Algorithm Design Manual_](https://www.algorist.com/) MIT's [_Introduction to Algorithms_](https://web.archive.org/web/20200604134756/https://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-046j-introduction-to-algorithms-sma-5503-fall-2005/), _Algorithms_ by Robert Sedgewick and Kevin Wayne which has a free [online textbook](http://algs4.cs.princeton.edu/home/) and a matching [Coursera course](https://www.coursera.org/learn/algorithms-part1). cpp11/vignettes/motivations.Rmd0000644000175000017500000005251514140020654016364 0ustar nileshnilesh--- title: "Motivations for cpp11" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Motivations for cpp11} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = as.logical(Sys.getenv("CPP11_EVAL", "false")) ) print_cpp <- function(filename) { cat("```c++", readLines(filename), "```", sep = "\n") } library(cpp11) should_run_benchmarks <- function(x) { get("requireNamespace")("cpp11test", quietly = TRUE) && asNamespace("cpp11test")$should_run_benchmarks() } ``` # Motivations R and S have a long history of interacting with compiled languages. In fact the original version of S written in the late 1970s was mainly a wrapper around FORTRAN routines. [(History-of-S)](https://www.r-project.org/conferences/useR-2006/Slides/Chambers.pdf) Released in 2000, the [cxx](https://cran.r-project.org/package=cxx) package was an early prototype of C++ bindings to R. [Rcpp](https://cran.r-project.org/package=Rcpp) was first published to CRAN in 2008, and [Rcpp11](https://cran.r-project.org/package=Rcpp11) in 2014. Of these `Rcpp` has by far the widest adoption, with over 2000 reverse dependencies as of 2020. Rcpp has been a widely successful project, however over the years a number of issues and additional C++ features have arisen. Adding these features to Rcpp would require a great deal of work, or in some cases would be impossible without severely breaking backwards compatibility. cpp11 is a ground up rewrite of C++ bindings to R with different design trade-offs and features. Changes that motivated cpp11 include: - Enforcing [copy-on-write semantics](#copy-on-write-semantics). - Improving the [safety](#improve-safety) of using the R API from C++ code. - Supporting [ALTREP objects](#altrep-support). - Using [UTF-8 strings](#utf-8-everywhere) everywhere. - Applying newer [C++11 features](#c11-features). - Having a more straightforward, [simpler implementation](#simpler-implementation). - Faster [compilation time](#compilation-speed) with lower memory requirements. - Being *completely* [header only](#header-only) to avoid ABI issues. - Capable of [vendoring](#vendoring) if desired. - More robust [protection](#protection) using a much more efficient linked list data structure. - [Growing vectors](#growing-vectors) more efficiently. ## Copy-on-write semantics R uses [copy-on-write](https://adv-r.hadley.nz/names-values.html#copy-on-modify) (also called copy-on-modify) semantics. Lets say you have two variables `x` and `y` that both point to the same underlying data. ```{r} x <- c(1, 2, 3) y <- x ``` If you modify `y`, R will first copy the values of `x` to a new position, then point `y` to the new location and only after the copy modify `y`. This allows `x` to retain the original values. ```{r} y[[3]] <- 4 y x ``` C++ does not have copy-on-write built into the language, however it has related concepts, copy-by-value and copy-by-reference. Copy-by-value works similarly to R, except that R only copies when something is changed, C++ _always_ copies. ```cpp int x = 42; int y = x; y = 0; // x is still == 42 ``` Copy-by-reference does the opposite, both `x` and `y` always point to the *same* underlying value. In C++ you specify a reference with `&`. ```cpp int x = 42; int &y = x; y = 0; // both x and y are now 0 ``` Copy-by-reference is a valuable technique, as it avoids the overhead of copying the data. However it can also lead to errors when internal functions change their inputs unexpectedly. Rcpp uses copy-by-reference by default (even if you pass a Rcpp vector class by value). This gives Rcpp functions completely different semantics from normal R functions. We can illustrate this by creating a Rcpp function that multiples its input vector by 2. ```{Rcpp} #include "Rcpp.h" using namespace Rcpp; // [[Rcpp::export]] NumericVector times_two_rcpp(NumericVector x) { for (int i = 0; i < x.size(); ++i) { x[i] = x[i] * 2; } return x; } ``` If you do this with regular R functions, you will see the value of `y` is `x` * 2, but the value of `x` is unchanged. ```{r} x <- c(1, 2, 3) y <- x * 2 y x ``` However if we now call our `times_two_rcpp()` function we get the right output value, but now `x` is *also changed*. ```{r} z <- times_two_rcpp(x) z x ``` cpp11 strives to make its functions behave similarly to normal R functions, while preserving the speed of Rcpp when read only access is needed. Each of the r_vector classes in cpp11 has a normal *read only* version that uses copy-by-reference, and a *writable* version which uses copy-by-value. ```{cpp11} #include "cpp11/doubles.hpp" [[cpp11::register]] cpp11::doubles times_two_cpp11(cpp11::writable::doubles x) { for (int i = 0; i < x.size(); ++i) { x[i] = x[i] * 2; } return x; } ``` Using `cpp11::writable::doubles` first *copies* the input vector, so when we do the multiplication we do not modify the original data. ```{r} x <- c(1, 2, 3) z <- times_two_cpp11(x) z x ``` ## Improve safety Internally R is written in C, not C++. In general C and C++ work well together, a large part of C++'s success is due to its high interoperability with C code. However one area in which C and C++ are generally *not* interoperable is error handling. In C++ the most common way to handle errors is with [exceptions](https://isocpp.org/wiki/faq/exceptions). Exceptions provide a clean, safe way for objects to obtain and cleanup resources automatically even when errors occur. ### C safety The C language does not have support for exceptions, so error handling is done a variety of ways. These include error codes like [errno](https://en.cppreference.com/w/c/error/errno), conditional statements, and in the R codebase the [longjmp](http://www.cplusplus.com/reference/csetjmp/longjmp/) function. `longjmp`, which stands for 'long jump' is a function that allows you to transfer the control flow of a program to another location elsewhere in the program. R uses long jumps extensively in its error handling routines. If an R function is executing and an error occurs, a long jump is called which 'jumps' the control flow into the error handling code. Crucially long jumps are *incompatible* with C++ [destructors](https://isocpp.org/wiki/faq/dtors). If a long jump occurs the destructors of any active C++ objects are not run, and therefore any resources (such as memory, file handles, etc.) managed by those objects will cause a [resource leak](https://en.wikipedia.org/wiki/Resource_leak). For example, the following unsafe code would leak the memory allocated in the C++ `std::vector` `x` when the R API function `Rf_allocVector()` fails (since you can't create a vector of `-1` size). ```cpp std::vector x({1., 2., 3.}); SEXP y = PROTECT(Rf_allocVector(REALSXP, -1)); ``` cpp11 provides two mechanisms to make interfacing with Rs C API and C++ code safer. `cpp11::unwind_protect()` takes a functional object (a C++11 lamdba function or `std::function`) and converts any C long jumps encountered to C++ exceptions. Now instead of a C long jump happening when the `Rf_allocVector()` call fails, a C++ exception occurs, which *does* trigger the `std::vector` destructor, so that memory is automatically released. ```cpp std::vector x({1., 2., 3.}); SEXP y; unwind_protect([]() { y = Rf_allocVector(REALSXP, -1); }) ``` `cpp11::safe()` is a more concise way to wrap a particular R API function with `unwind_protect()`. ```cpp std::vector x({1., 2., 3.}); SEXP y = PROTECT(safe[Rf_allocVector](REALSXP, -1)); ``` Again using `cpp11::safe()` converts the C long jump to a C++ exception, so the memory is automatically released. cpp11 uses these mechanisms extensively internally when calling the R C API, which make cpp11 much safer against resource leaks than using Rcpp or calling Rs C API by hand. ### C++ safety In the inverse of C safety we also need to ensure that C++ exceptions do not reach the C call stack, as they will terminate R if that occurs. Like Rcpp, cpp11 automatically generates `try / catch` guards around registered functions to prevent this and also converts C++ exceptions into normal R errors. This is done without developer facing code changes. With both C and C++ sides of the coin covered we can safely use R's C API and C++ code together with C++ objects without leaking resources. ## Altrep support [ALTREP](https://svn.r-project.org/R/branches/ALTREP/ALTREP.html) which stands for **ALT**ernative **REP**resntations is a feature introduced in R 3.5. ALTREP allows R internals and package authors to define alternative ways of representing data to R. One example of the use of altrep is the `:` operator. Prior to R 3.5 `:` generated a full vector for the entire sequence. e.g. `1:1000` would require 1000 individual values. As of R 3.5 this sequence is instead represented by an ALTREP vector, so *none* of the values actually exist in memory. Instead each time R access a particular value in the sequence that value is computed on-the-fly. This saves memory and excution time, and allows users to use sequences which would otherwise be too big to fit in memory. ```{r, R.options = list(max.print = 20)} 1:1e9 ``` Because Rcpp predates the introduction of ALTREP, it does not support the interfaces needed to access ALTREP objects. This means the objects must be converted to normal R objects as soon as they are used by Rcpp. ```{Rcpp} #include "Rcpp.h" // [[Rcpp::export]] Rcpp::IntegerVector identity_rcpp(Rcpp::IntegerVector x) { return x; } ``` ```{r} x <- identity_rcpp(1:100000) lobstr::obj_size(x) ``` Whereas cpp11 objects preserve the ALTREP object. ```{cpp11} #include "cpp11/integers.hpp" [[cpp11::register]] cpp11::integers identity_cpp11(cpp11::integers x) { return x; } ``` ```{r} y <- identity_cpp11(1:100000) lobstr::obj_size(y) ``` ### Altrep benchmarks In these benchmarks note that Rcpp allocates memory for the ALTREP vectors. This is because Rcpp implicitly converts them into normal R vectors. cpp11 retains them as ALTREP vectors, so no additional memory is needed. `foreach` and `accumulate` both use iterators that take advantage of `REAL_GET_REGION` to buffer queries. This makes them faster than naive C-style for loops with ALTREP vectors. The for2 case shows an optimization you can use if you know at compile-time that you won't be dealing with ALTREP vectors. By specifying `false` to the second argument (`is_altrep`), you can disable the ALTREP support. This causes the ALTREP conditional code to be compiled out resulting in loop unrolling (and speeds) identical to that generated by Rcpp. ```{r, message = FALSE, results = 'asis', eval = should_run_benchmarks()} library(cpp11test) cases <- expand.grid( len = 3e6, vector = c("normal", "altrep"), method = c("for", "foreach", "accumulate"), pkg = c("cpp11", "rcpp"), stringsAsFactors = FALSE ) # Add special case cases <- rbind(list(len = 3e6, vector = "normal", method = "for2", pkg = "cpp11"), cases) b_sum <- bench::press( .grid = cases, { seq_real <- function(x) as.numeric(seq_len(x)) funs <- c("normal" = rnorm, "altrep" = seq_real) x <- funs[[vector]](len) fun <- match.fun(sprintf("%ssum_dbl_%s_", ifelse(pkg == "cpp11", "", paste0(pkg, "_")), method)) bench::mark( fun(x) ) } )[c("pkg", "method", "vector", "min", "median", "mem_alloc", "itr/sec", "n_gc")] saveRDS(b_sum, "sum.Rds", version = 2) ``` ```{r} knitr::kable(readRDS("sum.Rds")) ``` [cpp11test/src/sum.cpp](https://github.com/r-lib/cpp11/blob/main/cpp11test/src/sum.cpp) contains the code ran in these benchmarks. ## UTF-8 everywhere R has complicated support for Unicode strings and non-ASCII code pages, whose behavior often differs substantially on different operating systems, particularly Windows. Correctly dealing with this is challenging and often feels like whack a mole. To combat this complexity cpp11 uses the [UTF-8 everywhere](http://utf8everywhere.org/) philosophy. This means that whenever text data is converted from R data structures to C++ data structures by cpp11 the data is translated into UTF-8. Conversely any text data coming from C++ code is assumed to be UTF-8 and marked as such for R. Doing this universally avoids many locale specific issues when dealing with Unicode text. Concretely cpp11 always uses `Rf_translateCharUTF8()` when obtaining `const char*` from `CHRSXP` objects and uses `Rf_mkCharCE(, CE_UTF8)` when creating new `CHRSXP` objects from `const char*` inputs. ## C++11 features C++11 provides a host of new features to the C++ language. cpp11 uses a number of these including - [move semantics](https://en.cppreference.com/w/cpp/language/move_constructor) - [type traits](https://en.cppreference.com/w/cpp/header/type_traits) - [initializer_list](https://en.cppreference.com/w/cpp/utility/initializer_list) - [variadic templates / parameter packs](https://en.cppreference.com/w/cpp/language/parameter_pack) - [user defined literals](https://en.cppreference.com/w/cpp/language/user_literal) - [user defined attributes](https://en.cppreference.com/w/cpp/language/attributes) ## Simpler implementation Rcpp is very ambitious, with a number of advanced features, including [modules](https://cran.r-project.org/package=Rcpp/vignettes/Rcpp-modules.pdf), [sugar](https://cran.r-project.org/package=Rcpp/vignettes/Rcpp-sugar.pdf) and extensive support for [attributes](https://CRAN.R-project.org/package=Rcpp/vignettes/Rcpp-attributes.pdf). While these are useful features, many R packages do not use one or any of these advanced features. In addition the code needed to support these features is complex and can be challenging to maintain. cpp11 takes a more limited scope, providing only the set of r_vector wrappers for R vector types, coercion methods to and from C++ and the limited attributes necessary to support use in R packages. ```{r, eval = FALSE, include = FALSE} # count lines for Rcpp headers (excluding comments) # brew install cloc git clone https://github.com/RcppCore/Rcpp.git cd Rcpp git checkout 1.0.4 cloc inst/include # count lines for Rcpp headers without generated code cloc --fullpath --not-match-f '.*generated.*' inst/include # count lines for cpp11 headers git clone https://github.com/r-lib/cpp11.git cd cpp11 cloc inst/include # get primary authors of Rcpp git ls-files -- inst/include | while read f; do git blame -w --line-porcelain -- "$f" | grep -I '^author '; done | sort -f | uniq -ic | sort -nr ``` This limited scope allows the implementation to be much simpler, the headers in Rcpp 1.0.4 have 74,658 lines of code (excluding blank or commented lines) in 379 files. Some headers in Rcpp are automatically generated, removing these still gives you 25,249 lines of code in 357 files. In contrast the headers in cpp11 contain only 1,734 lines of code in 19 files. This reduction in complexity should make cpp11 an easier project to maintain and ensure correctness, particularly around interactions with the R garbage collector. ## Compilation speed Rcpp always bundles all of its headers together, which causes slow compilation times and high peak memory usage when compiling. The headers in cpp11 are more easily decoupled, so you only can include only the particular headers you actually use in a source file. This can significantly improve the compilation speed and memory usage to compile your package. Here are some real examples of the reduction in compile time and peak memory usage after converting packages to cpp11. ```{r, eval = FALSE, include = FALSE} # brew install gtime # CC=gcc-9 CXX=g++-9 CXX11=g++-9 gtime -f %M:%e R CMD INSTALL --libs-only --use-vanilla . ``` | package | Rcpp compile time | cpp11 compile time | Rcpp peak memory | cpp11 peak memory | Rcpp commit | cpp11 commit | | --- | --- | --- | --- | --- | --- | --- | | haven | 17.42s | 7.13s | 428MB | 204MB | [a3cf75a4][haven] | [978cb034][haven] | | readr | 124.13s | 81.08s | 969MB | 684MB | [ec0d8989][readr] | [aa89ff72][readr] | | roxygen2 | 17.34s | 4.24s | 371MB | 109MB | [6f081b75][roxygen2] | [e8e1e22d][roxygen2] | | tidyr | 14.25s | 3.34s | 363MB | 83MB | [3899ed51][tidyr] | [60f7c7d4][tidyr] | [haven]: https://github.com/tidyverse/haven/compare/a3cf75a4...978cb034 [readr]: https://github.com/tidyverse/readr/compare/ec0d8989...aa89ff72 [roxygen2]: https://github.com/r-lib/roxygen2/compare/6f081b75...e8e1e22d [tidyr]: https://github.com/tidyverse/tidyr/compare/3899ed51...60f7c7d4 ## Header only Rcpp has long been a *mostly* [header only](https://en.wikipedia.org/wiki/Header-only) library, however is not a *completely* header only library. There have been [cases](https://github.com/tidyverse/dplyr/issues/2308) when a package was first installed with version X of Rcpp, and then a newer version of Rcpp was later installed. Then when the original package X was loaded R would crash, because the [Application Binary Interface](https://en.wikipedia.org/wiki/Application_binary_interface) of Rcpp had changed between the two versions. Because cpp11 consists of exclusively headers this issue does not occur. ## Vendoring In the go community the concept of [vendoring](https://go.googlesource.com/proposal/+/master/design/25719-go15vendor.md) is widespread. Vendoring means that you copy the code for the dependencies into your project's source tree. This ensures the dependency code is fixed and stable until it is updated. Because cpp11 is fully [header only](#header-only) you can vendor the code in the same way. `cpp11::vendor_cpp11()` is provided to do this if you choose. Vendoring has advantages and drawbacks however. The advantage is that changes to the cpp11 project could never break your existing code. The drawbacks are both minor, your package size is now slightly larger, and major, you no longer get bugfixes and new features until you explicitly update cpp11. I think the majority of packages should use `LinkingTo: cpp11` and _not_ vendor the cpp11 dependency. However, vendoring can be appropriate for certain situations. ## Protection cpp11 uses a custom double linked list data structure to track objects it is managing. This structure is much more efficient for large numbers of objects than using `R_PreserveObject()` / `R_ReleaseObjects()` as is done in Rcpp. ```{r, message = FALSE, eval = should_run_benchmarks()} library(cpp11test) grid <- expand.grid(len = c(10 ^ (2:5), 2e5), pkg = c("cpp11", "rcpp"), stringsAsFactors = FALSE) b_release <- bench::press(.grid = grid, { fun = match.fun(sprintf("%s_release_", pkg)) bench::mark( fun(len), iterations = 1 ) } )[c("len", "pkg", "min")] saveRDS(b_release, "release.Rds", version = 2) ``` ```{r, echo = FALSE, dev = "svg", fig.ext = "svg", eval = capabilities("cairo")} b_release <- readRDS("release.Rds") library(ggplot2) ggplot(b_release, aes(x = len, y = min / len, color = pkg)) + geom_point() + geom_line() + bench::scale_y_bench_time(base = NULL) + scale_x_continuous(labels = scales::comma)+ labs( tite = "cpp11 uses constant time protection", x = "Number of protected objects", y = "Average time to release protection on one object" ) ``` This plot shows the average time to protect and release a given object is essentially constant for cpp11. Whereas it is linear or worse with the number of objects being tracked for Rcpp. ```{r, echo = FALSE} knitr::kable(b_release) ``` ## Growing vectors One major difference in Rcpp and cpp11 is how vectors are grown. Rcpp vectors have a `push_back()` method, but unlike `std::vector()` no additional space is reserved when pushing. This makes calling `push_back()` repeatably very expensive, as the entire vector has to be copied each call. In contrast `cpp11` vectors grow efficiently, reserving extra space. Because of this you can do ~10,000,000 vector appends with cpp11 in approximately the same amount of time that Rcpp does 10,000, as this benchmark demonstrates. ```{r, message = FALSE, eval = should_run_benchmarks()} grid <- expand.grid(len = 10 ^ (0:7), pkg = "cpp11", stringsAsFactors = FALSE) grid <- rbind( grid, expand.grid(len = 10 ^ (0:4), pkg = "rcpp", stringsAsFactors = FALSE) ) b_grow <- bench::press(.grid = grid, { fun = match.fun(sprintf("%sgrow_", ifelse(pkg == "cpp11", "", paste0(pkg, "_")))) bench::mark( fun(len) ) } )[c("len", "pkg", "min", "mem_alloc", "n_itr", "n_gc")] saveRDS(b_grow, "growth.Rds", version = 2) ``` ```{r, echo = FALSE, dev = "svg", fig.ext = "svg", eval = capabilities("cairo")} b_grow <- readRDS("growth.Rds") library(ggplot2) ggplot(b_grow, aes(x = len, y = min, color = pkg)) + geom_point() + geom_line() + bench::scale_y_bench_time() + scale_x_log10( breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x)) ) + coord_fixed() + theme(panel.grid.minor = element_blank()) + labs(title = "log-log plot of vector size vs construction time", x = NULL, y = NULL) ``` ```{r, echo = FALSE} knitr::kable(b_grow) ``` ## Conclusion Rcpp has been and will continue to be widely successful. cpp11 is a alternative implementation of C++ bindings to R that chooses different design trade-offs and features. Both packages can co-exist (even be used in the same package!) and continue to enrich the R community. cpp11/vignettes/FAQ.Rmd0000644000175000017500000001421314120423440014406 0ustar nileshnilesh--- title: "FAQ" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{FAQ} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(cpp11) ``` Below are some Frequently Asked Questions about cpp11. If you have a question that you think would fit well here please [open an issue](https://github.com/r-lib/cpp11/issues/new/choose). #### 1. What are the underlying types of cpp11 objects? | vector | element | | --- | --- | | cpp11::integers | int | | cpp11::doubles | double | | cpp11::logical | cpp11::r_bool | | cpp11::strings | cpp11::r_string | | cpp11::raws | uint8_t | | cpp11::list | SEXP | #### 2. How do I add elements to a named list? Use the `push_back()` method with the named literal syntax. The named literal syntax is defined in the `cpp11::literals` namespace. ```{cpp11} #include [[cpp11::register]] cpp11::list foo_push() { using namespace cpp11::literals; cpp11::writable::list x; x.push_back({"foo"_nm = 1}); return x; } ``` #### 3. Does cpp11 support default arguments? cpp11 does not support default arguments, while convenient they would require more complexity to support than is currently worthwhile. If you need default argument support you can use a wrapper function around your cpp11 registered function. A common convention is to name the internal function with a trailing `_`. ```{cpp11} #include [[cpp11::register]] double add_some_(double x, double amount) { return x + amount; } ``` ```{r} add_some <- function(x, amount = 1) { add_some_(x, amount) } add_some(1) add_some(1, amount = 5) ``` #### 4. How do I create a new empty list? Define a new writable list object. `cpp11::writable::list x;` #### 5. How do I retrieve (named) elements from a named vector/list? Use the `[]` accessor function. `x["foo"]` #### 6. How can I tell whether a vector is named? Use the `named()` method for vector classes. ```{cpp11} #include [[cpp11::register]] bool is_named(cpp11::strings x) { return x.named(); } ``` ```{r} is_named("foo") is_named(c(x = "foo")) ``` #### 7. How do I return a `cpp11::writable::logicals` object with only a `FALSE` value? You need to use [list initialization](https://en.cppreference.com/w/cpp/language/list_initialization) with `{}` to create the object. ```{cpp11} #include [[cpp11::register]] cpp11::writable::logicals my_false() { return {FALSE}; } [[cpp11::register]] cpp11::writable::logicals my_true() { return {TRUE}; } [[cpp11::register]] cpp11::writable::logicals my_both() { return {TRUE, FALSE, TRUE}; } ``` ```{r} my_false() my_true() my_both() ``` #### 8. How do I create a new empty environment? To do this you need to call the `base::new.env()` function from C++. This can be done by creating a `cpp11::function` object and then calling it to generate the new environment. ```{cpp11} #include [[cpp11::register]] cpp11::environment create_environment() { cpp11::function new_env(cpp11::package("base")["new.env"]); return new_env(); } ``` #### 9. How do I assign and retrieve values in an environment? What happens if I try to get a value that doesn't exist? Use `[]` to retrieve or assign values from an environment by name. If a value does not exist it will return `R_UnboundValue`. ```{cpp11} #include [[cpp11::register]] bool foo_exists(cpp11::environment x) { return x["foo"] != R_UnboundValue; } [[cpp11::register]] void set_foo(cpp11::environment x, double value) { x["foo"] = value; } ``` ```{r} x <- new.env() foo_exists(x) set_foo(x, 1) foo_exists(x) ``` #### 10. How can I create a `cpp11:raws` from a `std::string`? There is no built in way to do this. One method would be to `push_back()` each element of the string individually. ```{cpp11} #include [[cpp11::register]] cpp11::raws push_raws() { std::string x("hi"); cpp11::writable::raws out; for (auto c : x) { out.push_back(c); } return out; } ``` ```{r} push_raws() ``` #### 11. How can I create a `std::string` from a `cpp11::writable::string`? Because C++ does not allow for two implicit cast, explicitly cast to `cpp11::r_string` first. ```{cpp11} #include #include [[cpp11::register]] std::string my_string() { cpp11::writable::strings x({"foo", "bar"}); std::string elt = cpp11::r_string(x[0]); return elt; } ``` #### 12. What are the types for C++ iterators? The iterators are `::iterator` classes contained inside the vector classes. For example the iterator for `cpp11::doubles` would be `cpp11::doubles::iterator` and the iterator for `cpp11::writable::doubles` would be `cpp11::writable::doubles::iterator`. #### 13. My code has `using namespace std`, why do I still have to include `std::` in the signatures of `[[cpp11::register]]` functions? The `using namespace std` directive will not be included in the generated code of the function signatures, so they still need to be fully qualified. However you will _not_ need to qualify the type names within those functions. The following won't compile ```{cpp11, eval = FALSE} #include #include using namespace std; [[cpp11::register]] string foobar() { return string("foo") + "-bar"; } ``` But this will compile and work as intended ```{cpp11} #include #include using namespace std; [[cpp11::register]] std::string foobar() { return string("foo") + "-bar"; } ``` #### 14. How do I modify a vector in place? In place modification breaks the normal semantics of R code. In general it should be avoided, which is why `cpp11::writable` classes always copy their data when constructed. However if you are _positive_ in-place modification is necessary for your use case you can use the move constructor to do this. ```{cpp11} #include [[cpp11::register]] void add_one(cpp11::sexp x_sexp) { cpp11::writable::integers x(std::move(x_sexp.data())); for (auto&& value : x) { ++value; } } ``` ```{r} x <- c(1L, 2L, 3L, 4L) .Internal(inspect(x)) add_one(x) .Internal(inspect(x)) x ``` cpp11/build/0000755000175000017500000000000014151206456012433 5ustar nileshnileshcpp11/build/vignette.rds0000644000175000017500000000047614151206456015001 0ustar nileshnileshRKN0t>-MROG@TaJł8RbGՈF":vⰂ=ϼ-F(| 6P00f(DKrrW'*eu:ĒqE%'eוPDJ(à )*|wHneꈵ}q!3m`qzTU95zu70ѭî]t\M^:ژ':?χ>IE97dL R;ZSۏxBmbŖJu_;J؎,Dpv"cpp11/tests/0000755000175000017500000000000014071114143012466 5ustar nileshnileshcpp11/tests/testthat/0000755000175000017500000000000014151365154014337 5ustar nileshnileshcpp11/tests/testthat/linking_to_incorrect_registers.cpp0000644000175000017500000000035414075321471023340 0ustar nileshnilesh[[cpp11::link_to("progress")]] [[cpp11::register]] void show_progress() { RProgress::RProgress pb("Processing [:bar] ETA: :eta"); pb.tick(0); for (int i = 0; i < 100; i++) { usleep(2.0 / 100 * 1000000); pb.tick(); } } cpp11/tests/testthat/multiple_incorrect.cpp0000644000175000017500000000025014075321471020742 0ustar nileshnilesh[[cpp11::registe]] int foo() { return 1; } [[cpp11::register]] double bar(bool run) { return 1.0; } [[cpp11::reg]] bool baz(bool run, int value = 0) { return true; } cpp11/tests/testthat/test-source.R0000644000175000017500000001317014075321471016740 0ustar nileshnileshtest_that("cpp_source works with the `code` parameter", { skip_on_os("solaris") dll_info <- cpp_source( code = ' #include "cpp11/integers.hpp" [[cpp11::register]] int num_odd(cpp11::integers x) { int total = 0; for (int val : x) { if ((val % 2) == 1) { ++total; } } return total; } ', clean = TRUE) on.exit(dyn.unload(dll_info[["path"]])) expect_equal(num_odd(as.integer(c(1:10, 15, 23))), 7) }) test_that("cpp_source works with the `file` parameter", { skip_on_os("solaris") tf <- tempfile(fileext = ".cpp") writeLines( "[[cpp11::register]] bool always_true() { return true; } ", tf) on.exit(unlink(tf)) dll_info <- cpp_source(tf, clean = TRUE, quiet = TRUE) on.exit(dyn.unload(dll_info[["path"]]), add = TRUE) expect_true(always_true()) }) test_that("cpp_source works with files called `cpp11.cpp`", { skip_on_os("solaris") tf <- file.path(tempdir(), "cpp11.cpp") writeLines( "[[cpp11::register]] bool always_true() { return true; } ", tf) on.exit(unlink(tf)) dll_info <- cpp_source(tf, clean = TRUE, quiet = TRUE) on.exit(dyn.unload(dll_info[["path"]]), add = TRUE) expect_true(always_true()) }) test_that("cpp_source returns original file name on error", { expect_output(try(cpp_source(test_path("single_error.cpp"), clean = TRUE), silent = TRUE), normalizePath(test_path("single_error.cpp"), winslash = "/"), fixed = TRUE) #error generated for incorrect attributes is separate from compilation errors expect_error(cpp_source(test_path("single_incorrect.cpp"), clean = TRUE), normalizePath(test_path("single_incorrect.cpp"), winslash = "/"), fixed = TRUE) }) test_that("cpp_source lets you set the C++ standard", { skip_on_os("solaris") skip_on_os("windows") # Older windows toolchains do not support C++14 tf <- tempfile(fileext = ".cpp") writeLines( '#include using namespace std::string_literals; [[cpp11::register]] std::string fun() { auto str = "hello_world"s; return str; } ', tf) on.exit(unlink(tf)) dll_info <- cpp_source(tf, clean = TRUE, quiet = TRUE, cxx_std = "CXX14") on.exit(dyn.unload(dll_info[["path"]]), add = TRUE) expect_equal(fun(), "hello_world") }) test_that("generate_cpp_name works", { expect_equal( generate_cpp_name("foo.cpp"), "foo.cpp" ) expect_equal( generate_cpp_name("foo.cpp", loaded_dlls = "foo"), "foo_2.cpp" ) expect_equal( generate_cpp_name("foo.cpp", loaded_dlls = c("foo", "foo_2")), "foo_3.cpp" ) }) test_that("generate_include_paths handles paths with spaces", { if (is_windows()) { mockery::stub(generate_include_paths, "system.file", "C:\\a path with spaces\\cpp11") expect_equal(generate_include_paths("cpp11"), "-I\"C:\\a path with spaces\\cpp11\"") } else { mockery::stub(generate_include_paths, "system.file", "/a path with spaces/cpp11") expect_equal(generate_include_paths("cpp11"), "-I'/a path with spaces/cpp11'") } }) test_that("check_valid_attributes does not return an error if all registers are correct", { expect_error_free( cpp11::cpp_source(clean = TRUE, code = '#include using namespace cpp11::literals; [[cpp11::register]] cpp11::list fn() { cpp11::writable::list x; x.push_back({"foo"_nm = 1}); return x; } [[cpp11::register]] cpp11::list fn2() { cpp11::writable::list x; x.push_back({"foo"_nm = 1}); return x; }')) expect_error_free( cpp11::cpp_source(clean = TRUE, code = '#include #include [[cpp11::linking_to("progress")]] [[cpp11::register]] void show_progress() { RProgress::RProgress pb("Processing [:bar] ETA: :eta"); pb.tick(0); for (int i = 0; i < 100; i++) { usleep(2.0 / 100 * 1000000); pb.tick(); } } ') ) }) test_that("check_valid_attributes returns an error if one or more registers is incorrect", { expect_error( cpp11::cpp_source(code = '#include using namespace cpp11::literals; [[cpp11::reg]] cpp11::list fn() { cpp11::writable::list x; x.push_back({"foo"_nm = 1}); return x; } [[cpp11::register]] cpp11::list fn2() { cpp11::writable::list x; x.push_back({"foo"_nm = 1}); return x; }')) expect_error( cpp11::cpp_source(code = '#include using namespace cpp11::literals; [[cpp11::reg]] cpp11::list fn() { cpp11::writable::list x; x.push_back({"foo"_nm = 1}); return x; }')) expect_error( cpp11::cpp_source(code = '#include using namespace cpp11::literals; [[cpp11::reg]] cpp11::list fn() { cpp11::writable::list x; x.push_back({"foo"_nm = 1}); return x; } [[cpp11::egister]] cpp11::list fn2() { cpp11::writable::list x; x.push_back({"foo"_nm = 1}); return x; }')) expect_error( cpp11::cpp_source( code = ' #include #include [[cpp11::link_to("progress")]] [[cpp11::register]] void show_progress() { RProgress::RProgress pb("Processing [:bar] ETA: :eta"); pb.tick(0); for (int i = 0; i < 100; i++) { usleep(2.0 / 100 * 1000000); pb.tick(); } } ')) }) test_that("cpp_source(d) functions work after sourcing file more than once", { cpp11::cpp_source(test_path("single.cpp"), clean = TRUE) expect_equal(foo(), 1) cpp11::cpp_source(test_path("single.cpp"), clean = TRUE) expect_equal(foo(), 1) }) cpp11/tests/testthat/test-vendor.R0000644000175000017500000000141214071114143016721 0ustar nileshnileshdescribe("cpp_vendor", { it("errors if cpp11 is not installed", { pkg <- local_package() mockery::stub(cpp_vendor, "system.file", "") expect_error( cpp_vendor(pkg_path(pkg)), "cpp11 is not installed" ) }) it("errors if cpp11 is already vendored", { pkg <- local_package() cpp_vendor(pkg_path(pkg)) expect_error( cpp_vendor(pkg_path(pkg)), "already exists" ) }) it("vendors cpp11", { pkg <- local_package() p <- pkg_path(pkg) cpp_vendor(pkg_path(pkg)) expect_true(dir.exists(file.path(p, "inst", "include", "cpp11"))) expect_true(file.exists(file.path(p, "inst", "include", "cpp11.hpp"))) expect_true(file.exists(file.path(p, "inst", "include", "cpp11", "declarations.hpp"))) }) }) cpp11/tests/testthat/single_error.cpp0000644000175000017500000000005314075321471017532 0ustar nileshnilesh[[cpp11::register]] int foo() { return 1 } cpp11/tests/testthat/helper.R0000644000175000017500000000155614075321471015747 0ustar nileshnileshlocal_package <- function() { dir <- tempfile() dir.create(dir) withr::defer(unlink(dir, recursive = TRUE), parent.frame()) writeLines("Package: testPkg", file.path(dir, "DESCRIPTION")) writeLines("useDynLib(testPkg, .registration = TRUE)", file.path(dir, "NAMESPACE")) desc::desc(dir) } pkg_path <- function(pkg) { dirname(pkg$.__enclos_env__$private$path) } get_funs <- function(path) { all_decorations <- decor::cpp_decorations(path, is_attribute = TRUE) get_registered_functions(all_decorations, "cpp11::register", quiet = FALSE) } get_package_name <- function(path) { desc::desc_get("Package", file = file.path(path, "DESCRIPTION")) } glue_str <- function(...) { glue::as_glue(unlist(list(...))) } read_file <- function(x) { readChar(x, file.size(x)) } expect_error_free <- function(..., regexp = NA) { expect_error(..., regexp = regexp) } cpp11/tests/testthat/single.cpp0000644000175000017500000000005414071114152016312 0ustar nileshnilesh[[cpp11::register]] int foo() { return 1; } cpp11/tests/testthat/test-register.R0000644000175000017500000005652214103317051017263 0ustar nileshnileshdescribe("pkg_links_to_rcpp", { it("works with single package in LinkingTo", { pkg <- local_package() expect_false(pkg_links_to_rcpp(pkg_path(pkg))) pkg$set("LinkingTo", "Rcpp") pkg$write() expect_true(pkg_links_to_rcpp(pkg_path(pkg))) }) it("works with multiple packages in LinkingTo", { pkg <- local_package() expect_false(pkg_links_to_rcpp(pkg_path(pkg))) pkg$set("LinkingTo", paste("Rcpp", "cpp11", sep = ",")) pkg$write() expect_true(pkg_links_to_rcpp(pkg_path(pkg))) }) }) describe("get_call_entries", { it("returns an empty string if there are no R files", { pkg <- local_package() path <- pkg_path(pkg) expect_equal(get_call_entries(path, get_funs(path)$name, get_package_name(path)), "") }) it("returns an empty string if there are no .Call calls", { pkg <- local_package() path <- pkg_path(pkg) dir.create(file.path(path, "R")) writeLines("foo <- function() 1", file.path(path, "R", "foo.R")) expect_equal(get_call_entries(path, get_funs(path)$name, get_package_name(path)), "") }) it("Errors for invalid packages", { # local_package adds a NAMESPACE file pkg <- tempfile() dir.create(pkg) on.exit(unlink(pkg, recursive = TRUE)) writeLines("Package: testPkg", file.path(pkg, "DESCRIPTION")) dir.create(file.path(pkg, "R")) writeLines('foo <- function() .Call("bar")', file.path(pkg, "R", "foo.R")) expect_error(get_call_entries(pkg, get_funs(path)$name, get_package_name(pkg)), "has no 'NAMESPACE' file") }) it("returns an empty string for packages with .Call entries and NAMESPACE files", { # tools::package_native_routine_registration_skeleton is not available before R 3.4 skip_if(getRversion() < "3.4") pkg <- local_package() path <- pkg_path(pkg) dir.create(file.path(path, "R")) writeLines('foo <- function() .Call("bar")', file.path(path, "R", "foo.R")) expect_equal( get_call_entries(path, get_funs(path)$name, get_package_name(path)), c("/* .Call calls */", "extern SEXP bar();", "", "static const R_CallMethodDef CallEntries[] = {", " {\"bar\", (DL_FUNC) &bar, 0},", " {NULL, NULL, 0}", "};" ) ) }) it("works with multiple register functions.", { pkg <- local_package() p <- pkg_path(pkg) dir.create(file.path(p, "src")) file.copy(test_path("multiple.cpp"), file.path(p, "src", "multiple.cpp")) cpp_register(p) cpp_bindings <- file.path(p, "src", "cpp11.cpp") expect_equal(read_file(cpp_bindings), "// Generated by cpp11: do not edit by hand // clang-format off #include \"cpp11/declarations.hpp\" #include // multiple.cpp int foo(); extern \"C\" SEXP _testPkg_foo() { BEGIN_CPP11 return cpp11::as_sexp(foo()); END_CPP11 } // multiple.cpp double bar(bool run); extern \"C\" SEXP _testPkg_bar(SEXP run) { BEGIN_CPP11 return cpp11::as_sexp(bar(cpp11::as_cpp>(run))); END_CPP11 } // multiple.cpp bool baz(bool run, int value); extern \"C\" SEXP _testPkg_baz(SEXP run, SEXP value) { BEGIN_CPP11 return cpp11::as_sexp(baz(cpp11::as_cpp>(run), cpp11::as_cpp>(value))); END_CPP11 } extern \"C\" { static const R_CallMethodDef CallEntries[] = { {\"_testPkg_bar\", (DL_FUNC) &_testPkg_bar, 1}, {\"_testPkg_baz\", (DL_FUNC) &_testPkg_baz, 2}, {\"_testPkg_foo\", (DL_FUNC) &_testPkg_foo, 0}, {NULL, NULL, 0} }; } extern \"C\" attribute_visible void R_init_testPkg(DllInfo* dll){ R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); R_forceSymbols(dll, TRUE); } ") }) }) describe("wrap_call", { it("works with void functions and no arguments", { expect_equal( wrap_call("foo", "void", tibble::tibble(type = character(), name = character())), " foo();\n return R_NilValue;" ) }) it("works with non-void functions and no arguments", { expect_equal( wrap_call("foo", "bool", tibble::tibble(type = character(), name = character())), " return cpp11::as_sexp(foo());" ) }) it("works with void functions and some arguments", { expect_equal( wrap_call("foo", "void", tibble::tibble(type = c("double", "int"), name = c("x", "y"))), " foo(cpp11::as_cpp>(x), cpp11::as_cpp>(y));\n return R_NilValue;" ) }) it("works with non-void functions and some arguments", { expect_equal( wrap_call("foo", "bool", tibble::tibble(type = c("double", "int"), name = c("x", "y"))), " return cpp11::as_sexp(foo(cpp11::as_cpp>(x), cpp11::as_cpp>(y)));" ) }) }) describe("get_registered_functions", { it("returns an empty tibble given a non-existent file", { f <- tempfile() decorations <- decor::cpp_decorations(files = f, is_attribute = TRUE) res <- get_registered_functions(decorations, "cpp11::register") expect_equal(names(res), c("file", "line", "decoration", "params", "context", "name", "return_type", "args")) expect_equal(NROW(res), 0) }) it("returns an empty tibble given a empty file", { f <- tempfile() file.create(f) decorations <- decor::cpp_decorations(files = f, is_attribute = TRUE) res <- get_registered_functions(decorations, "cpp11::register") expect_equal(names(res), c("file", "line", "decoration", "params", "context", "name", "return_type", "args")) expect_equal(NROW(res), 0) }) it("works with a single registration", { decorations <- decor::cpp_decorations(files = test_path("single.cpp"), is_attribute = TRUE) res <- get_registered_functions(decorations, "cpp11::register") expect_equal(names(res), c("file", "line", "decoration", "params", "context", "name", "return_type", "args")) expect_equal(NROW(res), 1L) expect_equal(res$name, "foo") expect_equal(res$return_type, "int") expect_equal(names(res$args[[1]]), c("type", "name", "default")) expect_equal(NROW(res$args[[1]]), 0) }) it("works with multiple registrations", { decorations <- decor::cpp_decorations(files = test_path("multiple.cpp"), is_attribute = TRUE) res <- get_registered_functions(decorations, "cpp11::register") expect_equal(names(res), c("file", "line", "decoration", "params", "context", "name", "return_type", "args")) expect_equal(NROW(res), 3L) expect_equal(res$name, c("foo", "bar", "baz")) expect_equal(res$return_type, c("int", "double", "bool")) expect_equal(names(res$args[[1]]), c("type", "name", "default")) expect_equal(NROW(res$args[[1]]), 0) expect_equal(names(res$args[[2]]), c("type", "name", "default")) expect_equal(NROW(res$args[[2]]), 1) expect_equal(res$args[[2]]$type, "bool") expect_equal(res$args[[2]]$name, "run") expect_equal(res$args[[2]]$default, NA_character_) expect_equal(names(res$args[[3]]), c("type", "name", "default")) expect_equal(NROW(res$args[[3]]), 2) expect_equal(res$args[[3]]$type, c("bool", "int")) expect_equal(res$args[[3]]$name, c("run", "value")) expect_equal(res$args[[3]]$default, c(NA_character_, "0")) }) }) describe("generate_cpp_functions", { it("returns the empty string if there are no functions", { funs <- tibble::tibble( file = character(), line = integer(), decoration = character(), params = list(), context = list(), name = character(), return_type = character(), args = list(tibble::tibble(type = character(), name = character())) ) expect_equal(generate_cpp_functions(funs), character()) }) it("returns the wrapped function for a single void function with no arguments", { funs <- tibble::tibble( file = "foo.cpp", line = 1L, decoration = "cpp11", params = list(NA), context = list(NA_character_), name = "foo", return_type = "void", args = list(tibble::tibble(type = character(), name = character())) ) expect_equal(generate_cpp_functions(funs), "// foo.cpp void foo(); extern \"C\" SEXP _cpp11_foo() { BEGIN_CPP11 foo(); return R_NilValue; END_CPP11 }" ) }) it("returns the wrapped function for a single void function with no arguments and different package name", { funs <- tibble::tibble( file = "foo.cpp", line = 1L, decoration = "cpp11", params = list(NA), context = list(NA_character_), name = "foo", return_type = "void", args = list(tibble::tibble(type = character(), name = character())) ) expect_equal(generate_cpp_functions(funs, package = "mypkg"), "// foo.cpp void foo(); extern \"C\" SEXP _mypkg_foo() { BEGIN_CPP11 foo(); return R_NilValue; END_CPP11 }" ) }) it("returns the wrapped function for a single function with no arguments", { funs <- tibble::tibble( file = "foo.cpp", line = 1L, decoration = "cpp11", params = list(NA), context = list(NA_character_), name = "foo", return_type = "int", args = list(tibble::tibble(type = character(), name = character())) ) expect_equal(generate_cpp_functions(funs), "// foo.cpp int foo(); extern \"C\" SEXP _cpp11_foo() { BEGIN_CPP11 return cpp11::as_sexp(foo()); END_CPP11 }" ) }) it("returns the wrapped function for a single void function with arguments", { funs <- tibble::tibble( file = "foo.cpp", line = 1L, decoration = "cpp11", params = list(NA), context = list(NA_character_), name = "foo", return_type = "void", args = list(tibble::tibble(type = "int", name = "bar")) ) expect_equal(generate_cpp_functions(funs), "// foo.cpp void foo(int bar); extern \"C\" SEXP _cpp11_foo(SEXP bar) { BEGIN_CPP11 foo(cpp11::as_cpp>(bar)); return R_NilValue; END_CPP11 }" ) }) it("returns the wrapped function for a single function with arguments", { funs <- tibble::tibble( file = "foo.cpp", line = 1L, decoration = "cpp11", params = list(NA), context = list(NA_character_), name = "foo", return_type = "int", args = list(tibble::tibble(type = "int", name = "bar")) ) expect_equal(generate_cpp_functions(funs), "// foo.cpp int foo(int bar); extern \"C\" SEXP _cpp11_foo(SEXP bar) { BEGIN_CPP11 return cpp11::as_sexp(foo(cpp11::as_cpp>(bar))); END_CPP11 }" ) }) it("returns the wrapped functions for multiple functions with arguments", { funs <- tibble::tibble( file = c("foo.cpp", "bar.cpp"), line = c(1L, 3L), decoration = c("cpp11", "cpp11"), params = list(NA, NA), context = list(NA_character_, NA_character_), name = c("foo", "bar"), return_type = c("int", "bool"), args = list( tibble::tibble(type = "int", name = "bar"), tibble::tibble(type = "double", name = "baz") ) ) expect_equal(generate_cpp_functions(funs), "// foo.cpp int foo(int bar); extern \"C\" SEXP _cpp11_foo(SEXP bar) { BEGIN_CPP11 return cpp11::as_sexp(foo(cpp11::as_cpp>(bar))); END_CPP11 } // bar.cpp bool bar(double baz); extern \"C\" SEXP _cpp11_bar(SEXP baz) { BEGIN_CPP11 return cpp11::as_sexp(bar(cpp11::as_cpp>(baz))); END_CPP11 }" ) }) }) describe("generate_r_functions", { it("returns the empty string if there are no functions", { funs <- tibble::tibble( file = character(), line = integer(), decoration = character(), params = list(), context = list(), name = character(), return_type = character(), args = list() ) expect_equal(generate_r_functions(funs), character()) }) it("returns the wrapped function for a single void function with no arguments", { funs <- tibble::tibble( file = "foo.cpp", line = 1L, decoration = "cpp11", params = list(NA), context = list(NA_character_), name = "foo", return_type = "void", args = list(tibble::tibble(type = character(), name = character())) ) expect_equal(generate_r_functions(funs, package = "cpp11"), "foo <- function() { invisible(.Call(`_cpp11_foo`)) }") }) it("returns the wrapped function for a single void function with no arguments and use_package = TRUE", { funs <- tibble::tibble( file = "foo.cpp", line = 1L, decoration = "cpp11", params = list(NA), context = list(NA_character_), name = "foo", return_type = "void", args = list(tibble::tibble(type = character(), name = character())) ) expect_equal(generate_r_functions(funs, package = "cpp11", use_package = TRUE), "foo <- function() { invisible(.Call(\"_cpp11_foo\", PACKAGE = \"cpp11\")) }") }) it("returns the wrapped function for a single void function with no arguments and different package name", { funs <- tibble::tibble( file = "foo.cpp", line = 1L, decoration = "cpp11", params = list(NA), context = list(NA_character_), name = "foo", return_type = "void", args = list(tibble::tibble(type = character(), name = character())) ) expect_equal(generate_r_functions(funs, package = "mypkg"), "foo <- function() { invisible(.Call(`_mypkg_foo`)) }") }) it("returns the wrapped function for a single function with no arguments", { funs <- tibble::tibble( file = "foo.cpp", line = 1L, decoration = "cpp11", params = list(NA), context = list(NA_character_), name = "foo", return_type = "int", args = list(tibble::tibble(type = character(), name = character())) ) expect_equal(generate_r_functions(funs, package = "cpp11"), "foo <- function() { .Call(`_cpp11_foo`) }") }) it("returns the wrapped function for a single function with no arguments and use_package = TRUE", { funs <- tibble::tibble( file = "foo.cpp", line = 1L, decoration = "cpp11", params = list(NA), context = list(NA_character_), name = "foo", return_type = "int", args = list(tibble::tibble(type = character(), name = character())) ) expect_equal(generate_r_functions(funs, package = "cpp11", use_package = TRUE), "foo <- function() { .Call(\"_cpp11_foo\", PACKAGE = \"cpp11\") }") }) it("returns the wrapped function for a single void function with arguments", { funs <- tibble::tibble( file = "foo.cpp", line = 1L, decoration = "cpp11", params = list(NA), context = list(NA_character_), name = "foo", return_type = "void", args = list(tibble::tibble(type = "int", name = "bar")) ) expect_equal(generate_r_functions(funs, package = "cpp11"), "foo <- function(bar) { invisible(.Call(`_cpp11_foo`, bar)) }") }) it("returns the wrapped function for a single function with arguments", { funs <- tibble::tibble( file = "foo.cpp", line = 1L, decoration = "cpp11", params = list(NA), context = list(NA_character_), name = "foo", return_type = "int", args = list(tibble::tibble(type = "int", name = "bar")) ) expect_equal(generate_r_functions(funs, package = "cpp11"), "foo <- function(bar) { .Call(`_cpp11_foo`, bar) }") }) it("returns the wrapped functions for multiple functions with arguments", { funs <- tibble::tibble( file = c("foo.cpp", "bar.cpp"), line = c(1L, 3L), decoration = c("cpp11", "cpp11"), params = list(NA, NA), context = list(NA_character_, NA_character_), name = c("foo", "bar"), return_type = c("int", "bool"), args = list( tibble::tibble(type = "int", name = "bar"), tibble::tibble(type = "double", name = "baz") ) ) expect_equal(generate_r_functions(funs, package = "cpp11"), "foo <- function(bar) { .Call(`_cpp11_foo`, bar) } bar <- function(baz) { .Call(`_cpp11_bar`, baz) }") }) }) describe("cpp_register", { it("returns an invisible empty character if there are no decorations", { f <- tempfile() expect_equal(cpp_register(f), character()) dir.create(f) expect_equal(cpp_register(f), character()) }) it("works with a package that registers a single c++ function", { # tools::package_native_routine_registration_skeleton is not available before R 3.4 skip_if(getRversion() < "3.4") pkg <- local_package() p <- pkg_path(pkg) dir.create(file.path(p, "src")) file.copy(test_path("single.cpp"), file.path(p, "src", "single.cpp")) cpp_register(p) r_bindings <- file.path(p, "R", "cpp11.R") expect_true(file.exists(r_bindings)) expect_equal(read_file(r_bindings), "# Generated by cpp11: do not edit by hand foo <- function() { .Call(`_testPkg_foo`) } ") cpp_bindings <- file.path(p, "src", "cpp11.cpp") expect_true(file.exists(cpp_bindings)) expect_equal(read_file(cpp_bindings), "// Generated by cpp11: do not edit by hand // clang-format off #include \"cpp11/declarations.hpp\" #include // single.cpp int foo(); extern \"C\" SEXP _testPkg_foo() { BEGIN_CPP11 return cpp11::as_sexp(foo()); END_CPP11 } extern \"C\" { static const R_CallMethodDef CallEntries[] = { {\"_testPkg_foo\", (DL_FUNC) &_testPkg_foo, 0}, {NULL, NULL, 0} }; } extern \"C\" attribute_visible void R_init_testPkg(DllInfo* dll){ R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); R_forceSymbols(dll, TRUE); } ") }) it("can be run without messages", { pkg <- local_package() p <- pkg_path(pkg) dir.create(file.path(p, "src")) file.copy(test_path("single.cpp"), file.path(p, "src", "single.cpp")) expect_silent(cpp_register(p, quiet = TRUE)) }) it("can be run with messages, by default", { pkg <- local_package() p <- pkg_path(pkg) dir.create(file.path(p, "src")) file.copy(test_path("single.cpp"), file.path(p, "src", "single.cpp")) expect_message(cpp_register(p), "1 functions decorated with [[cpp11::register]]", fixed = TRUE) }) it("includes pkg_types.h if included in src", { pkg <- local_package() p <- pkg_path(pkg) dir.create(file.path(p, "src")) file.copy(test_path("single.cpp"), file.path(p, "src", "single.cpp")) writeLines("#include ", file.path(p, "src", "testPkg_types.h")) cpp_register(p) expect_true( any( grepl( pattern = '#include "testPkg_types.h"', x = readLines(file.path(p, "src", "cpp11.cpp")), fixed = TRUE ) ) ) }) it("includes pkg_types.hpp if included in src", { pkg <- local_package() p <- pkg_path(pkg) dir.create(file.path(p, "src")) file.copy(test_path("single.cpp"), file.path(p, "src", "single.cpp")) writeLines("#include ", file.path(p, "src", "testPkg_types.hpp")) cpp_register(p) expect_true( any( grepl( pattern = '#include "testPkg_types.hpp"', x = readLines(file.path(p, "src", "cpp11.cpp")), fixed = TRUE ) ) ) }) it("includes pkg_types.h if included in inst/include", { pkg <- local_package() p <- pkg_path(pkg) dir.create(file.path(p, "src")) file.copy(test_path("single.cpp"), file.path(p, "src", "single.cpp")) dir.create(file.path(p, "inst", "include"), recursive = TRUE) writeLines("#include ", file.path(p, "inst", "include", "testPkg_types.h")) cpp_register(p) expect_true( any( grepl( pattern = '#include "testPkg_types.h"', x = readLines(file.path(p, "src", "cpp11.cpp")), fixed = TRUE ) ) ) }) it("includes pkg_types.hpp if included in inst/include", { pkg <- local_package() p <- pkg_path(pkg) dir.create(file.path(p, "src")) file.copy(test_path("single.cpp"), file.path(p, "src", "single.cpp")) dir.create(file.path(p, "inst", "include"), recursive = TRUE) writeLines("#include ", file.path(p, "inst", "include", "testPkg_types.hpp")) cpp_register(p) expect_true( any( grepl( pattern = '#include "testPkg_types.hpp"', x = readLines(file.path(p, "src", "cpp11.cpp")), fixed = TRUE ) ) ) }) it("does not error if no files have registered functions", { pkg <- local_package() p <- pkg_path(pkg) dir.create(file.path(p, "src")) writeLines("int foo(int x) { return x; }", file.path(p, "src", "foo.cpp")) expect_error_free(cpp_register(p)) }) }) describe("generate_init_functions", { it("returns an empty list if there no functions", { funs <- tibble::tibble( file = character(), line = integer(), decoration = character(), params = list(), context = list(), name = character(), return_type = character(), args = list(tibble::tibble(type = character(), name = character())) ) expect_equal(generate_init_functions(funs), list(declarations = "", calls = "")) }) it("returns the declaration and call for a single init function", { funs <- tibble::tibble( file = "foo.cpp", line = 1L, decoration = "cpp11", params = list(NA), context = list(NA_character_), name = "foo", return_type = "void", args = list(tibble::tibble(type = "DllInfo*", name = "dll")) ) expect_equal(generate_init_functions(funs), list(declarations = "\nvoid foo(DllInfo* dll);\n", calls = "\n foo(dll);")) }) it("returns the declaration and call for a multiple init functions", { funs <- tibble::tibble( file = c("foo.cpp", "bar.cpp"), line = c(1L, 3L), decoration = c("cpp11", "cpp11"), params = list(NA, NA), context = list(NA_character_, NA_character_), name = c("foo", "bar"), return_type = c("void", "void"), args = list(tibble::tibble(type = "DllInfo*", name = "dll"), tibble::tibble(type = "DllInfo*", name = "dll")) ) expect_equal(generate_init_functions(funs), list(declarations = "\nvoid foo(DllInfo* dll);\nvoid bar(DllInfo* dll);\n", calls = "\n foo(dll);\n bar(dll);")) }) }) test_that("check_valid_attributes does not return an error if all registers are correct", { pkg <- local_package() p <- pkg_path(pkg) dir.create(file.path(p, "src")) file.copy(test_path("single.cpp"), file.path(p, "src", "single.cpp")) expect_error_free(cpp_register(p)) pkg <- local_package() p <- pkg_path(pkg) dir.create(file.path(p, "src")) file.copy(test_path("multiple.cpp"), file.path(p, "src", "multiple.cpp")) expect_error_free(cpp_register(p)) pkg <- local_package() p <- pkg_path(pkg) dir.create(file.path(p, "src")) file.copy(test_path("linking_to_registers.cpp"), file.path(p, "src", "linking_to_registers.cpp")) expect_error_free(cpp_register(p)) }) test_that("check_valid_attributes returns an error if one or more registers is incorrect", { pkg <- local_package() p <- pkg_path(pkg) dir.create(file.path(p, "src")) file.copy(test_path("single_incorrect.cpp"), file.path(p, "src", "single_incorrect.cpp")) expect_error(cpp_register(p)) pkg <- local_package() p <- pkg_path(pkg) dir.create(file.path(p, "src")) file.copy(test_path("multiple_incorrect.cpp"), file.path(p, "src", "multiple_incorrect.cpp")) expect_error(cpp_register(p)) pkg <- local_package() p <- pkg_path(pkg) dir.create(file.path(p, "src")) file.copy(test_path("linking_to_incorrect_registers.cpp"), file.path(p, "src", "linking_to_incorrect_registers.cpp")) expect_error(cpp_register(p)) }) cpp11/tests/testthat/linking_to_registers.cpp0000644000175000017500000000035714075321471021273 0ustar nileshnilesh[[cpp11::linking_to("progress")]] [[cpp11::register]] void show_progress() { RProgress::RProgress pb("Processing [:bar] ETA: :eta"); pb.tick(0); for (int i = 0; i < 100; i++) { usleep(2.0 / 100 * 1000000); pb.tick(); } } cpp11/tests/testthat/test-utils.R0000644000175000017500000000153014071114152016565 0ustar nileshnileshdescribe("cli_suppress", { it("suppresses cli outputs", { f <- function() cli::cli_text("hi") expect_message(f(), "hi") expect_message(cli_suppress(f()), NA) }) }) describe("glue_collapse_data", { it("works with empty inputs", { expect_equal( glue_collapse_data(mtcars, ""), "" ) expect_equal( glue_collapse_data(mtcars[FALSE, ], "{hp}"), "" ) }) it("works with non-empty inputs", { expect_equal( glue_collapse_data(mtcars[1, ], "{hp}"), "110" ) expect_equal( glue_collapse_data(mtcars[1:2, ], "{hp}"), "110, 110" ) }) }) describe("stop_unless_installed", { mockery::stub(stop_unless_installed, "requireNamespace", FALSE) expect_error( stop_unless_installed("foo"), "The foo package\\(s\\) are required for this functionality" ) }) cpp11/tests/testthat/single_incorrect.cpp0000644000175000017500000000005314075321471020371 0ustar nileshnilesh[[cpp11::registe]] int foo() { return 1; } cpp11/tests/testthat/multiple.cpp0000644000175000017500000000025614071114152016670 0ustar nileshnilesh[[cpp11::register]] int foo() { return 1; } [[cpp11::register]] double bar(bool run) { return 1.0; } [[cpp11::register]] bool baz(bool run, int value = 0) { return true; } cpp11/tests/testthat/test-knitr.R0000644000175000017500000000114514071114152016556 0ustar nileshnileshdescribe("eng_cpp11", { it("works when code is not evaluated", { skip_on_os("solaris") opts <- knitr::opts_chunk$get() opts <- utils::modifyList(opts, list(eval = FALSE, engine = "cpp11", code = "1 + 1")) expect_equal( eng_cpp11(opts), "1 + 1" ) }) it("works when code is evaluated", { skip_on_os("solaris") opts <- knitr::opts_chunk$get() code <- "[[cpp11::register]] int foo() { return 0; }" opts <- utils::modifyList(opts, list(eval = TRUE, engine = "cpp11", code = code, quiet = TRUE)) expect_equal( eng_cpp11(opts), code ) }) }) cpp11/tests/testthat.R0000644000175000017500000000006614071114143014453 0ustar nileshnileshlibrary(testthat) library(cpp11) test_check("cpp11") cpp11/R/0000755000175000017500000000000014151206456011535 5ustar nileshnileshcpp11/R/source.R0000644000175000017500000001567614134540114013170 0ustar nileshnilesh#' Compile C++ code #' #' [cpp_source()] compiles and loads a single C++ file for use in R. #' [cpp_function()] compiles and loads a single function for use in R. #' [cpp_eval()] evaluates a single C++ expression and returns the result. #' #' Within C++ code you can use `[[cpp11::linking_to("pkgxyz")]]` to link to #' external packages. This is equivalent to putting those packages in the #' `LinkingTo` field in a package DESCRIPTION. #' #' @param file A file containing C++ code to compile #' @param code If non-null, the C++ code to compile #' @param env The R environment where the R wrapping functions should be defined. #' @param clean If `TRUE`, cleanup the files after sourcing #' @param quiet If 'TRUE`, do not show compiler output #' @param cxx_std The C++ standard to use, the `CXX_STD` make macro is set to #' this value. The default value queries the `CXX_STD` environment variable, or #' uses 'CXX11' if unset. #' @param dir The directory to store the generated source files. `tempfile()` is #' used by default. The directory will be removed if `clean` is `TRUE`. #' @return For [cpp_source()] and `[cpp_function()]` the results of #' [dyn.load()] (invisibly). For `[cpp_eval()]` the results of the evaluated #' expression. #' @examples #' #' cpp_source( #' code = '#include "cpp11/integers.hpp" #' #' [[cpp11::register]] #' int num_odd(cpp11::integers x) { #' int total = 0; #' for (int val : x) { #' if ((val % 2) == 1) { #' ++total; #' } #' } #' return total; #' } #' ') #' #' num_odd(as.integer(c(1:10, 15, 23))) #' #' if (interactive() && require("progress")) { #' #' cpp_source( #' code = ' #' #include #' #include #' #' [[cpp11::linking_to("progress")]] #' #' [[cpp11::register]] void #' show_progress() { #' RProgress::RProgress pb("Processing [:bar] ETA: :eta"); #' #' pb.tick(0); #' for (int i = 0; i < 100; i++) { #' usleep(2.0 / 100 * 1000000); #' pb.tick(); #' } #' } #' ') #' #' show_progress() #' } #' #' @export cpp_source <- function(file, code = NULL, env = parent.frame(), clean = TRUE, quiet = TRUE, cxx_std = Sys.getenv("CXX_STD", "CXX11"), dir = tempfile()) { stop_unless_installed(c("brio", "callr", "cli", "decor", "desc", "glue", "tibble", "vctrs")) dir.create(dir, showWarnings = FALSE, recursive = TRUE) dir.create(file.path(dir, "R"), showWarnings = FALSE) dir.create(file.path(dir, "src"), showWarnings = FALSE) if (!is.null(code)) { tf <- tempfile(pattern = "code_", fileext = ".cpp") file <- tf if (isTRUE(clean)) { on.exit(unlink(tf)) } brio::write_lines(code, file) } if (!any(tools::file_ext(file) %in% c("cpp", "cc"))) { stop("`file` must have a `.cpp` or `.cc` extension") } name <- generate_cpp_name(file) package <- tools::file_path_sans_ext(name) orig_dir <- normalizePath(dirname(file), winslash = "/") new_dir <- normalizePath(file.path(dir, "src"), winslash = "/") # file now points to another location file.copy(file, file.path(new_dir, name)) #change variable name to reflect this new_file_path <- file.path(new_dir, name) new_file_name <- basename(new_file_path) orig_file_path <- file.path(orig_dir, new_file_name) suppressWarnings( all_decorations <- decor::cpp_decorations(dir, is_attribute = TRUE) ) #provide original path for error messages check_valid_attributes(all_decorations, file = orig_file_path) cli_suppress( funs <- get_registered_functions(all_decorations, "cpp11::register") ) cpp_functions_definitions <- generate_cpp_functions(funs, package = package) cpp_path <- file.path(dirname(new_file_path), "cpp11.cpp") brio::write_lines(c('#include "cpp11/declarations.hpp"', "using namespace ::cpp11;", cpp_functions_definitions), cpp_path) linking_to <- union(get_linking_to(all_decorations), "cpp11") includes <- generate_include_paths(linking_to) if (isTRUE(clean)) { on.exit(unlink(dir, recursive = TRUE), add = TRUE) } r_functions <- generate_r_functions(funs, package = package, use_package = TRUE) makevars_content <- generate_makevars(includes, cxx_std) brio::write_lines(makevars_content, file.path(new_dir, "Makevars")) source_files <- normalizePath(c(new_file_path, cpp_path), winslash = "/") res <- callr::rcmd("SHLIB", source_files, user_profile = TRUE, show = !quiet, wd = new_dir) if (res$status != 0) { error_messages <- res$stderr # Substitute temporary file path with original file path error_messages <- gsub(tools::file_path_sans_ext(new_file_path), tools::file_path_sans_ext(orig_file_path), error_messages, fixed = TRUE) cat(error_messages) stop("Compilation failed.", call. = FALSE) } shared_lib <- file.path(dir, "src", paste0(tools::file_path_sans_ext(new_file_name), .Platform$dynlib.ext)) r_path <- file.path(dir, "R", "cpp11.R") brio::write_lines(r_functions, r_path) source(r_path, local = env) dyn.load(shared_lib, local = TRUE, now = TRUE) } the <- new.env(parent = emptyenv()) the$count <- 0L generate_cpp_name <- function(name, loaded_dlls = c("cpp11", names(getLoadedDLLs()))) { ext <- tools::file_ext(name) root <- tools::file_path_sans_ext(basename(name)) count <- 2 new_name <- root while(new_name %in% loaded_dlls) { new_name <- sprintf("%s_%i", root, count) count <- count + 1 } sprintf("%s.%s", new_name, ext) } generate_include_paths <- function(packages) { out <- character(length(packages)) for (i in seq_along(packages)) { path <- system.file(package = packages[[i]], "include") if (is_windows()) { path <- utils::shortPathName(path) } out[[i]] <- paste0("-I", shQuote(path)) } out } generate_makevars <- function(includes, cxx_std) { c(sprintf("CXX_STD=%s", cxx_std), sprintf("PKG_CPPFLAGS=%s", paste0(includes, collapse = " "))) } #' @rdname cpp_source #' @export cpp_function <- function(code, env = parent.frame(), clean = TRUE, quiet = TRUE, cxx_std = Sys.getenv("CXX_STD", "CXX11")) { cpp_source(code = paste(c('#include "cpp11.hpp"', "using namespace ::cpp11;", "namespace writable = ::cpp11::writable;", "[[cpp11::register]]", code), collapse = "\n"), env = env, clean = clean, quiet = quiet, cxx_std = cxx_std ) } utils::globalVariables("f") #' @rdname cpp_source #' @export cpp_eval <- function(code, env = parent.frame(), clean = TRUE, quiet = TRUE, cxx_std = Sys.getenv("CXX_STD", "CXX11")) { cpp_source(code = paste(c('#include "cpp11.hpp"', "using namespace ::cpp11;", "namespace writable = ::cpp11::writable;", "[[cpp11::register]]", "SEXP f() { return as_sexp(", code, ");", "}"), collapse = "\n"), env = env, clean = clean, quiet = quiet, cxx_std = cxx_std ) f() } get_linking_to <- function(decorations) { out <- decorations[decorations$decoration == "cpp11::linking_to", ] if (NROW(decorations) == 0) { return(character()) } gsub("\"", "", as.character(unlist(out$params))) } cpp11/R/register.R0000644000175000017500000002332714120423675013513 0ustar nileshnilesh#' Generates wrappers for registered C++ functions #' #' Functions decorated with `[[cpp11::register]]` in files ending in `.cc`, #' `.cpp`, `.h` or `.hpp` will be wrapped in generated code and registered to #' be called from R. #' #' Note registered functions will not be *exported* from your package unless #' you also add a `@export` roxygen2 directive for them. #' #' In order to use `cpp_register()` the `cli`, `decor`, `desc`, `glue`, #' `tibble` and `vctrs` packages must also be installed. #' @param path The path to the package root directory #' @param quiet If `TRUE` suppresses output from this function #' @return The paths to the generated R and C++ source files (in that order). #' @export #' @examples #' # create a minimal package #' dir <- tempfile() #' dir.create(dir) #' #' writeLines("Package: testPkg", file.path(dir, "DESCRIPTION")) #' writeLines("useDynLib(testPkg, .registration = TRUE)", file.path(dir, "NAMESPACE")) #' #' # create a C++ file with a decorated function #' dir.create(file.path(dir, "src")) #' writeLines("[[cpp11::register]] int one() { return 1; }", file.path(dir, "src", "one.cpp")) #' #' # register the functions in the package #' cpp_register(dir) #' #' # Files generated by registration #' file.exists(file.path(dir, "R", "cpp11.R")) #' file.exists(file.path(dir, "src", "cpp11.cpp")) #' #' # cleanup #' unlink(dir, recursive = TRUE) cpp_register <- function(path = ".", quiet = FALSE) { stop_unless_installed(get_cpp_register_needs()) r_path <- file.path(path, "R", "cpp11.R") cpp_path <- file.path(path, "src", "cpp11.cpp") unlink(c(r_path, cpp_path)) suppressWarnings( all_decorations <- decor::cpp_decorations(path, is_attribute = TRUE) ) if (nrow(all_decorations) == 0) { return(invisible(character())) } check_valid_attributes(all_decorations) funs <- get_registered_functions(all_decorations, "cpp11::register", quiet) package <- desc::desc_get("Package", file = file.path(path, "DESCRIPTION")) cpp_functions_definitions <- generate_cpp_functions(funs, package) init <- generate_init_functions(get_registered_functions(all_decorations, "cpp11::init", quiet)) r_functions <- generate_r_functions(funs, package, use_package = FALSE) dir.create(dirname(r_path), recursive = TRUE, showWarnings = FALSE) brio::write_lines(path = r_path, glue::glue(' # Generated by cpp11: do not edit by hand {r_functions} ' )) if (!quiet) { cli::cli_alert_success("generated file {.file {basename(r_path)}}") } call_entries <- get_call_entries(path, funs$name, package) cpp_function_registration <- glue::glue_data(funs, ' {{ "_cpp11_{name}", (DL_FUNC) &_{package}_{name}, {n_args}}}, ', n_args = viapply(funs$args, nrow) ) cpp_function_registration <- glue::glue_collapse(cpp_function_registration, sep = "\n") extra_includes <- character() if (pkg_links_to_rcpp(path)) { extra_includes <- c(extra_includes, "#include ", "#include ", "using namespace Rcpp;") } pkg_types <- c( file.path(path, "src", paste0(package, "_types.h")), file.path(path, "src", paste0(package, "_types.hpp")), file.path(path, "inst", "include", paste0(package, "_types.h")), file.path(path, "inst", "include", paste0(package, "_types.hpp")) ) pkg_types_exist <- file.exists(pkg_types) if (any(pkg_types_exist)) { extra_includes <- c( sprintf('#include "%s"', basename(pkg_types[pkg_types_exist])), extra_includes ) } extra_includes <- paste0(extra_includes, collapse = "\n") brio::write_lines(path = cpp_path, glue::glue(' // Generated by cpp11: do not edit by hand // clang-format off {extra_includes} #include "cpp11/declarations.hpp" #include {cpp_functions_definitions} extern "C" {{ {call_entries} }} {init$declarations} extern "C" attribute_visible void R_init_{package}(DllInfo* dll){{ R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE);{init$calls} R_forceSymbols(dll, TRUE); }} ', call_entries = glue::glue_collapse(call_entries, "\n") )) if (!quiet) { cli::cli_alert_success("generated file {.file {basename(cpp_path)}}") } invisible(c(r_path, cpp_path)) } utils::globalVariables(c("name", "return_type", "line", "decoration", "context", ".", "functions", "res")) get_registered_functions <- function(decorations, tag, quiet = FALSE) { if (NROW(decorations) == 0) { return(tibble::tibble(file = character(), line = integer(), decoration = character(), params = list(), context = list(), name = character(), return_type = character(), args = list())) } out <- decorations[decorations$decoration == tag, ] out$functions <- lapply(out$context, decor::parse_cpp_function, is_attribute = TRUE) out <- vctrs::vec_cbind(out, vctrs::vec_rbind(!!!out$functions)) out <- out[!(names(out) %in% "functions")] out$decoration <- sub("::[[:alpha:]]+", "", out$decoration) n <- nrow(out) if (!quiet && n > 0) { cli::cli_alert_info(glue::glue("{n} functions decorated with [[{tag}]]")) } out } generate_cpp_functions <- function(funs, package = "cpp11") { funs <- funs[c("name", "return_type", "args", "file", "line", "decoration")] funs$real_params <- vcapply(funs$args, glue_collapse_data, "{type} {name}") funs$sexp_params <- vcapply(funs$args, glue_collapse_data, "SEXP {name}") funs$calls <- mapply(wrap_call, funs$name, funs$return_type, funs$args, SIMPLIFY = TRUE) funs$package <- package out <- glue::glue_data(funs, ' // {basename(file)} {return_type} {name}({real_params}); extern "C" SEXP _{package}_{name}({sexp_params}) {{ BEGIN_CPP11 {calls} END_CPP11 }} ' ) out <- glue::glue_collapse(out, sep = "\n") unclass(out) } generate_init_functions <- function(funs) { if (nrow(funs) == 0) { return(list(declarations = "", calls = "")) } funs <- funs[c("name", "return_type", "args", "file", "line", "decoration")] funs$declaration_params <- vcapply(funs$args, glue_collapse_data, "{type} {name}") funs$call_params <- vcapply(funs$args, `[[`, "name") declarations <- glue::glue_data(funs, ' {return_type} {name}({declaration_params}); ' ) declarations <- paste0("\n", glue::glue_collapse(declarations, "\n"), "\n") calls <- glue::glue_data(funs, ' {name}({call_params}); ' ) calls <- paste0("\n", glue::glue_collapse(calls, "\n")); list( declarations = declarations, calls = calls ) } generate_r_functions <- function(funs, package = "cpp11", use_package = FALSE) { funs <- funs[c("name", "return_type", "args")] if (use_package) { package_call <- glue::glue(', PACKAGE = "{package}"') package_names <- glue::glue_data(funs, '"_{package}_{name}"') } else { package_names <- glue::glue_data(funs, '`_{package}_{name}`') package_call <- "" } funs$package <- package funs$package_call <- package_call funs$list_params <- vcapply(funs$args, glue_collapse_data, "{name}") funs$params <- vcapply(funs$list_params, function(x) if (nzchar(x)) paste0(", ", x) else x) is_void <- funs$return_type == "void" funs$calls <- ifelse(is_void, glue::glue_data(funs, 'invisible(.Call({package_names}{params}{package_call}))'), glue::glue_data(funs, '.Call({package_names}{params}{package_call})') ) out <- glue::glue_data(funs, ' {name} <- function({list_params}) {{ {calls} }} ') out <- glue::glue_collapse(out, sep = "\n\n") unclass(out) } wrap_call <- function(name, return_type, args) { call <- glue::glue('{name}({list_params})', list_params = glue_collapse_data(args, "cpp11::as_cpp>({name})")) if (return_type == "void") { unclass(glue::glue(" {call};\n return R_NilValue;", .trim = FALSE)) } else { unclass(glue::glue(" return cpp11::as_sexp({call});")) } } get_call_entries <- function(path, names, package) { con <- textConnection("res", local = TRUE, open = "w") withr::with_collate("C", tools::package_native_routine_registration_skeleton(path, con, character_only = FALSE, include_declarations = TRUE ) ) close(con) start <- grep("/* .Call calls */", res, fixed = TRUE) end <- grep("};", res, fixed = TRUE) if (length(start) == 0) { return("") } redundant <- glue::glue_collapse(glue::glue('extern SEXP _{package}_{names}'), sep = '|') if (length(redundant) > 0) { redundant <- paste0("^", redundant) res <- res[!grepl(redundant, res)] } end <- grep("};", res, fixed = TRUE) call_calls <- startsWith(res, "extern SEXP") if(any(call_calls)) { return(res[seq(start, end)]) } mid <- grep("static const R_CallMethodDef CallEntries[] = {", res, fixed = TRUE) res[seq(mid, end)] } pkg_links_to_rcpp <- function(path) { deps <- desc::desc_get_deps(file.path(path, "DESCRIPTION")) any(deps$type == "LinkingTo" & deps$package == "Rcpp") } get_cpp_register_needs <- function() { res <- read.dcf(system.file("DESCRIPTION", package = "cpp11"))[, "Config/Needs/cpp11/cpp_register"] strsplit(res, "[[:space:]]*,[[:space:]]*")[[1]] } check_valid_attributes <- function(decorations, file = decorations$file) { bad_decor <- startsWith(decorations$decoration, "cpp11::") & (!decorations$decoration %in% c("cpp11::register", "cpp11::init", "cpp11::linking_to")) if(any(bad_decor)) { lines <- decorations$line[bad_decor] names <- decorations$decoration[bad_decor] bad_lines <- glue::glue_collapse(glue::glue("- Invalid attribute `{names}` on line {lines} in file '{file}'."), "\n") msg <- glue::glue("cpp11 attributes must be one of `cpp11::register`, `cpp11::init` or `cpp11::linking_to`: {bad_lines} ") stop(msg, call. = FALSE) } } cpp11/R/zzz.R0000644000175000017500000000107314140524100012501 0ustar nileshnilesh# From https://github.com/r-lib/vctrs/blob/a518ead0b08be29beea287d11e17edc1017e16da/R/zzz.R#L3 on_package_load <- function(pkg, expr) { if (isNamespaceLoaded(pkg)) { expr } else { thunk <- function(...) expr setHook(packageEvent(pkg, "onLoad"), thunk) } } # We need to set the cpp11 knitr engine when cpp11 is loaded. .onLoad <- function(libname, pkgname) { on_package_load("knitr", { knitr::knit_engines$set(cpp11 = eng_cpp11) }) } release_bullets <- function() { c( '`Sys.setenv("CPP11_EVAL" = "true"); devtools::submit_cran()`' ) } cpp11/R/vendor.R0000644000175000017500000000373514071114152013155 0ustar nileshnilesh#' Vendor the cpp11 dependency #' #' Vendoring is the act of making your own copy of the 3rd party packages your #' project is using. It is often used in the go language community. #' #' This function vendors cpp11 into your package by copying the cpp11 #' headers into the `inst/include` folder of your package and adding #' 'cpp11 version: XYZ' to the top of the files, where XYZ is the version of #' cpp11 currently installed on your machine. #' #' If you choose to vendor the headers you should _remove_ `LinkingTo: #' cpp11` from your DESCRIPTION. #' #' **Note**: vendoring places the responsibility of updating the code on #' **you**. Bugfixes and new features in cpp11 will not be available for your #' code until you run `vector_cpp11()` again. #' #' @inheritParams cpp_register #' @return The file path to the vendored code (invisibly). #' @export #' @examples #' # create a new directory #' dir <- tempfile() #' dir.create(dir) #' #' # vendor the cpp11 headers into the directory #' cpp_vendor(dir) #' #' list.files(file.path(dir, "inst", "include", "cpp11")) #' #' # cleanup #' unlink(dir, recursive = TRUE) cpp_vendor <- function(path = ".") { new <- file.path(path, "inst", "include", "cpp11") if (dir.exists(new)) { stop("'", new, "' already exists\n * run unlink('", new, "', recursive = TRUE)", call. = FALSE) } dir.create(new , recursive = TRUE, showWarnings = FALSE) current <- system.file("include", "cpp11", package = "cpp11") if (!nzchar(current)) { stop("cpp11 is not installed", call. = FALSE) } cpp11_version <- utils::packageVersion("cpp11") cpp11_header <- sprintf("// cpp11 version: %s\n// vendored on: %s", cpp11_version, Sys.Date()) files <- list.files(current, full.names = TRUE) writeLines( c(cpp11_header, readLines(system.file("include", "cpp11.hpp", package = "cpp11"))), file.path(dirname(new), "cpp11.hpp") ) for (f in files) { writeLines(c(cpp11_header, readLines(f)), file.path(new, basename(f))) } invisible(new) } cpp11/R/knitr.R0000644000175000017500000000065314071114152013003 0ustar nileshnilesheng_cpp11 <- function(options) { if (options$eval) { cpp_source( code = options$code, env = knitr::knit_global(), clean = options$clean %||% TRUE, quiet = options$quiet %||% FALSE, cxx_std = options$cxx_std %||% Sys.getenv("CXX_STD", "CXX11") ) } # Change the engine to cpp so that code formatting works options$engine <- "cpp" knitr::engine_output(options, options$code, "") } cpp11/R/cpp11-package.R0000644000175000017500000000004114071114141014156 0ustar nileshnilesh#' @keywords internal "_PACKAGE" cpp11/R/utils.R0000644000175000017500000000342414071114152013013 0ustar nileshnileshcli_suppress <- function(expr) { withCallingHandlers( expr, cli_message = function(c) { invokeRestart("cli_message_handled") } ) } glue_collapse_data <- function(data, ..., sep = ", ", last = "") { res <- glue::glue_collapse(glue::glue_data(data, ...), sep = sep, last = last) if (length(res) == 0) { return("") } unclass(res) } `%||%` <- function(x, y) if (is.null(x)) y else x viapply <- function(x, f, ...) vapply(x, f, integer(1), ...) vcapply <- function(x, f, ...) vapply(x, f, character(1), ...) stop_unless_installed <- function(pkgs) { has_pkg <- logical(length(pkgs)) for (i in seq_along(pkgs)) { has_pkg[[i]] <- requireNamespace(pkgs[[i]], quietly = TRUE) } if (any(!has_pkg)) { msg <- sprintf( "The %s package(s) are required for this functionality", paste(pkgs[!has_pkg], collapse = ", ") ) if (is_interactive()) { ans <- readline(paste(c(msg, "Would you like to install them? (Y/N) "), collapse = "\n")) if (tolower(ans) == "y") { utils::install.packages(pkgs[!has_pkg]) stop_unless_installed(pkgs) return() } } stop(msg, call. = FALSE) } } is_windows <- function() { .Platform$OS.type == "windows" } # This is basically the same as rlang::is_interactive(), which we can't really # use for stop_if_not_installed(), because rlang itself could be one of the # input pkgs. is_interactive <- function() { opt <- getOption("rlang_interactive", NULL) if (!is.null(opt)) { return(opt) } if (isTRUE(getOption("knitr.in.progress"))) { return(FALSE) } if (isTRUE(getOption("rstudio.notebook.executing"))) { return(FALSE) } if (identical(Sys.getenv("TESTTHAT"), "true")) { return(FALSE) } interactive() } cpp11/LICENSE0000644000175000017500000000004514071114141012326 0ustar nileshnileshYEAR: 2020 COPYRIGHT HOLDER: RStudio cpp11/inst/0000755000175000017500000000000014151206456012311 5ustar nileshnileshcpp11/inst/doc/0000755000175000017500000000000014151206456013056 5ustar nileshnileshcpp11/inst/doc/converting.R0000644000175000017500000000301114151206373015350 0ustar nileshnilesh## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) should_run_benchmarks <- function(x) { get("requireNamespace")("cpp11test", quietly = TRUE) && asNamespace("cpp11test")$should_run_benchmarks() } ## ---- message = FALSE, eval = should_run_benchmarks()------------------------- # library(cpp11test) # grid <- expand.grid(len = 10 ^ (0:7), pkg = "cpp11", stringsAsFactors = FALSE) # grid <- rbind( # grid, # expand.grid(len = 10 ^ (0:4), pkg = "rcpp", stringsAsFactors = FALSE) # ) # b_grow <- bench::press(.grid = grid, # { # fun = match.fun(sprintf("%sgrow_", ifelse(pkg == "cpp11", "", paste0(pkg, "_")))) # bench::mark( # fun(len) # ) # } # )[c("len", "pkg", "min", "mem_alloc", "n_itr", "n_gc")] # saveRDS(b_grow, "growth.Rds", version = 2) ## ---- echo = FALSE, dev = "svg", fig.ext = "svg", eval = capabilities("cairo")---- b_grow <- readRDS("growth.Rds") library(ggplot2) ggplot(b_grow, aes(x = len, y = min, color = pkg)) + geom_point() + geom_line() + bench::scale_y_bench_time() + scale_x_log10( breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x)) ) + coord_fixed() + theme(panel.grid.minor = element_blank()) + labs(title = "log-log plot of vector size vs construction time", x = NULL, y = NULL) ## ---- echo = FALSE------------------------------------------------------------ knitr::kable(b_grow) cpp11/inst/doc/converting.Rmd0000644000175000017500000002324214120423440015671 0ustar nileshnilesh--- title: "Converting from Rcpp" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Converting from Rcpp} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) should_run_benchmarks <- function(x) { get("requireNamespace")("cpp11test", quietly = TRUE) && asNamespace("cpp11test")$should_run_benchmarks() } ``` In many cases there is no need to convert a package from Rcpp. If the code is already written and you don't have a very compelling need to use cpp11 I would recommend you continue to use Rcpp. However if you _do_ feel like your project will benefit from using cpp11 this vignette will provide some guidance and doing the conversion. It is also a place to highlight some of the largest differences between Rcpp and cpp11. ## Class comparison table | Rcpp | cpp11 (read-only) | cpp11 (writable) | cpp11 header | | --- | --- | --- | --- | | NumericVector | doubles | writable::doubles | | | IntegerVector | integers | writable::integers | | | CharacterVector | strings | writable::strings | | | RawVector | raws | writable::raws | | | List | list | writable::list | | | RObject | sexp | | | | XPtr | | external_pointer | | | Environment | | environment | | | Function | | function | | | Environment (namespace) | | package | | | wrap | | as_sexp | | | as | | as_cpp | | | stop | stop | | | | checkUserInterrupt | check_user_interrupt | | | ## Incomplete list of Rcpp features not included in cpp11 - None of [Modules](https://CRAN.R-project.org/package=Rcpp/vignettes/Rcpp-modules.pdf) - None of [Sugar](https://CRAN.R-project.org/package=Rcpp/vignettes/Rcpp-sugar.pdf) - Some parts of [Attributes](https://CRAN.R-project.org/package=Rcpp/vignettes/Rcpp-attributes.pdf) - No dependencies - No random number generator restoration - No support for roxygen2 comments - No interfaces ## Read-only vs writable vectors The largest difference between cpp11 and Rcpp classes is that Rcpp classes modify their data in place, whereas cpp11 classes require copying the data to a writable class for modification. The default classes, e.g. `cpp11::doubles` are *read-only* classes that do not permit modification. If you want to modify the data you need to use the classes in the `cpp11::writable` namespace, e.g. `cpp11::writable::doubles`. In addition use the `writable` variants if you need to create a new R vector entirely in C++. ## Fewer implicit conversions Rcpp also allows very flexible implicit conversions, e.g. if you pass a `REALSXP` to a function that takes a `Rcpp::IntegerVector()` it is implicitly converted to a `INTSXP`. These conversions are nice for usability, but require (implicit) duplication of the data, with the associated runtime costs. cpp11 throws an error in these cases. If you want the implicit coercions you can add a call to `as.integer()` or `as.double()` as appropriate from R when you call the function. ## Calling R functions from C++ Calling R functions from C++ is similar to using Rcpp. ```c++ Rcpp::Function as_tibble("as_tibble", Rcpp::Environment::namespace_env("tibble")); as_tibble(x, Rcpp::Named(".rows", num_rows), Rcpp::Named(".name_repair", name_repair)); ``` ```c++ using namespace cpp11::literals; // so we can use ""_nm syntax auto as_tibble = cpp11::package("tibble")["as_tibble"]; as_tibble(x, ".rows"_nm = num_rows, ".name_repair"_nm = name_repair); ``` ## Appending behavior One major difference in Rcpp and cpp11 is how vectors are grown. Rcpp vectors have a `push_back()` method, but unlike `std::vector()` no additional space is reserved when pushing. This makes calling `push_back()` repeatably very expensive, as the entire vector has to be copied each call. In contrast `cpp11` vectors grow efficiently, reserving extra space. Because of this you can do ~10,000,000 vector appends with cpp11 in approximately the same amount of time that Rcpp does 10,000, as this benchmark demonstrates. ```{r, message = FALSE, eval = should_run_benchmarks()} library(cpp11test) grid <- expand.grid(len = 10 ^ (0:7), pkg = "cpp11", stringsAsFactors = FALSE) grid <- rbind( grid, expand.grid(len = 10 ^ (0:4), pkg = "rcpp", stringsAsFactors = FALSE) ) b_grow <- bench::press(.grid = grid, { fun = match.fun(sprintf("%sgrow_", ifelse(pkg == "cpp11", "", paste0(pkg, "_")))) bench::mark( fun(len) ) } )[c("len", "pkg", "min", "mem_alloc", "n_itr", "n_gc")] saveRDS(b_grow, "growth.Rds", version = 2) ``` ```{r, echo = FALSE, dev = "svg", fig.ext = "svg", eval = capabilities("cairo")} b_grow <- readRDS("growth.Rds") library(ggplot2) ggplot(b_grow, aes(x = len, y = min, color = pkg)) + geom_point() + geom_line() + bench::scale_y_bench_time() + scale_x_log10( breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x)) ) + coord_fixed() + theme(panel.grid.minor = element_blank()) + labs(title = "log-log plot of vector size vs construction time", x = NULL, y = NULL) ``` ```{r, echo = FALSE} knitr::kable(b_grow) ``` ## Random Number behavior Rcpp unconditionally includes calls to `GetRNGstate()` and `PutRNGstate()` before each wrapped function. This ensures that if any C++ code calls the R API functions `unif_rand()`, `norm_rand()`, `exp_rand()` or `R_unif_index()` the random seed state is set accordingly. cpp11 does _not_ do this, so you must include the calls to `GetRNGstate()` and `PutRNGstate()` _yourself_ if you use any of those functions in your C++ code. See [R-exts 6.3 - Random number generation](https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Random-numbers) for details on these functions. One convenient way to do safely is to use a simple class: ```cpp class local_rng { public: local_rng() { GetRNGstate(); } ~local_rng(){ PutRNGstate(); } }; void foo() { local_rng rng_state; /* my code using the RNG */ } ``` ## Mechanics of converting a package from Rcpp 1. Add cpp11 to `LinkingTo` 1. Add C++11 to `SystemRequirements` 1. Convert all instances of `// [[Rcpp::export]]` to `[[cpp11::register]]` 1. Clean and recompile the package, e.g. `pkgbuild::clean_dll()` `pkgload::load_all()` 1. Run tests `devtools::test()` 1. Start converting function by function - Remember you can usually inter-convert between cpp11 and Rcpp classes by going through `SEXP` if needed. - Converting the code a bit at a time (and regularly running your tests) is the best way to do the conversion correctly and make progress - Doing a separate commit after converting each file (or possibly each function) can make finding any regressions with [git bisect](https://youtu.be/KKeucpfAuuA) much easier in the future. ## Common issues when converting ### STL includes Rcpp.h includes a number of STL headers automatically, notably `` and ``, however the cpp11 headers generally do not. If you have errors like > error: no type named 'string' in namespace 'std' You will need to include the appropriate STL header, in this case ``. ### R API includes cpp11 conflicts with macros declared by some R headers unless the macros `R_NO_REMAP` and `STRICT_R_HEADERS` are defined. If you include `cpp11/R.hpp` before any R headers these macros will be defined appropriately, otherwise you may see errors like > R headers were included before cpp11 headers and at least one of R_NO_REMAP or STRICT_R_HEADERS was not defined. Which indicate that you must either change your include order or add preprocessor definitions for `R_NO_REMAP` and `STRICT_R_HEADERS`. Note that transitive includes of R headers (for example, those included by `Rcpp.h`) can also introduce the conflicting macros. ### Type aliases If you use typedefs for cpp11 types or define custom types you will need to define them in a `pkgname_types.hpp` file so that `cpp_register()` can include it in the generated code. ### `cpp11::stop()` and `cpp11::warning()` with `std::string` `cpp11::stop()` and `cpp11::warning()` are thin wrappers around `Rf_stop()` and `Rf_warning()`. These are simple C functions with a `printf()` API, so do not understand C++ objects like `std::string`. Therefore you need to call `obj.c_str()` when passing character data to them. ### Logical vector construction If you are constructing a length 1 logical vector you may need to explicitly use a `r_bool()` object in the initializer list rather than `TRUE`, `FALSE` or `NA_INTEGER`. This issue only occurs with the clang compiler, not gcc. When constructing vectors with more than one element this is not an issue ```cpp // bad cpp11::writable::logicals({FALSE}); // good cpp11::writable::logicals({r_bool(FALSE)}); // good cpp11::writable::logicals({FALSE, NA_LOGICAL}); ``` cpp11/inst/doc/internals.Rmd0000644000175000017500000002061214140020620015503 0ustar nileshnilesh--- title: "cpp11 internals" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{cpp11 internals} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` The development repository for cpp11 is . ## Initial setup and dev workflow First install any dependencies needed for development. ```r install.packages("remotes") remotes::install_deps(dependencies = TRUE) ``` You can load the package in an interactive R session ```r devtools::load_all() ``` Or run the tests with ```r devtools::test() ``` `test()` will also re-compile the package if needed, so you do not always have to run `load_all()`. If you change the cpp11 headers you will need to clean and recompile the cpp11test package ```r devtools::clean_dll() devtools::load_all() ``` Generally when developing the C++ headers I run R with its working directory in the `cpp11test` directory and use `devtools::test()` to run the cpp11tests. To calculate code coverage of the cpp11 package run the following from the `cpp11` root directory. ```r covr::report(cpp11_coverage()) ``` ## Code formatting This project uses [clang-format](https://clang.llvm.org/docs/ClangFormat.html) (version 10) to automatically format the c++ code. You can run `make format` to re-format all code in the project. If your system does not have `clang-format` version 10, this can be installed using a [homebrew tap](https://github.com/r-lib/homebrew-taps) at the command line with `brew install r-lib/taps/clang-format@10`. You may need to link the newly installed version 10. To do so, run `brew unlink clang-format` followed by `brew link clang-format@10`. Alternatively many IDEs support automatically running `clang-format` every time files are written. ## Code organization cpp11 is a header only library, so all source code exposed to users lives in [inst/include](https://github.com/r-lib/cpp11/tree/main/inst/include). R code used to register functions and for `cpp11::cpp_source()` is in [R/](https://github.com/r-lib/cpp11/tree/main/R). Tests for _only_ the code in `R/` is in [tests/testthat/](https://github.com/r-lib/cpp11/tree/main/tests/testthat) The rest of the code is in a separate [cpp11test/](https://github.com/r-lib/cpp11/tree/main/cpp11test) package included in the source tree. Inside [cpp11test/src](https://github.com/r-lib/cpp11/tree/main/cpp11test/src) the files that start with `test-` are C++ tests using the [Catch](https://testthat.r-lib.org/reference/use_catch.html) support in testthat. In addition there are some regular R tests in [cpp11test/tests/testthat/](https://github.com/r-lib/cpp11/tree/main/cpp11test/tests/testthat). ## Naming conventions - All header files are named with a `.hpp` extension. - All source files are named with a `.cpp` extension. - Public header files should be put in `inst/include/cpp11` - Read only r_vector classes and free functions should be put in the `cpp11` namespace. - Writable r_vector class should be put in the `cpp11::writable` namespace. - Private classes and functions should be put in the `cpp11::internal` namespace. ## Vector classes All of the basic r_vector classes are class templates, the base template is defined in [cpp11/r_vector.hpp](https://github.com/r-lib/cpp11/blob/main/inst/include/cpp11/r_vector.hpp) The template parameter is the type of **value** the particular R vector stores, e.g. `double` for `cpp11::doubles`. This differs from Rcpp, whose first template parameter is the R vector type, e.g. `REALSXP`. The file first has the class declarations, then function definitions further down in the file. Specializations for the various types are in separate files, e.g. [cpp11/doubles.hpp](https://github.com/r-lib/cpp11/blob/main/inst/include/cpp11/doubles.hpp), [cpp11/integers.hpp](https://github.com/r-lib/cpp11/blob/main/inst/include/cpp11/integers.hpp) ## Coercion functions There are two different coercion functions `as_sexp()` takes a C++ object and coerces it to a SEXP object, so it can be used in R. `as_cpp<>()` is a template function that takes a SEXP and creates a C++ object from it The various methods for both functions are defined in [cpp11/as.hpp](https://github.com/r-lib/cpp11/blob/main/inst/include/cpp11/as.hpp) This is definitely the most complex part of the cpp11 code, with extensive use of [template metaprogramming](https://en.wikipedia.org/wiki/Template_metaprogramming). In particular the [substitution failure is not an error (SFINAE)](https://en.wikipedia.org/wiki/Substitution_failure_is_not_an_error) technique is used to control overloading of the functions. If we could use C++20 a lot of this code would be made simpler with [Concepts](https://en.cppreference.com/w/cpp/language/constraints), but alas. The most common C++ types are included in the test suite and should work without issues, as more exotic types are used in real projects additional issues may arise. Some useful links on SFINAE - https://www.fluentcpp.com/2018/05/15/make-sfinae-pretty-1-what-value-sfinae-brings-to-code/, https://www.fluentcpp.com/2018/05/18/make-sfinae-pretty-2-hidden-beauty-sfinae/ ## Protection ### Protect list cpp11 uses an idea proposed by [Luke Tierney](https://github.com/RcppCore/Rcpp/issues/1081#issuecomment-630330838) to use a double linked list with the head preserved to protect objects cpp11 is protecting. Each node in the list uses the head (`CAR`) part to point to the previous node, and the `CDR` part to point to the next node. The `TAG` is used to point to the object being protected. The head and tail of the list have `R_NilValue` as their `CAR` and `CDR` pointers respectively. Calling `preserved.insert()` with a regular R object will add a new node to the list and return a protect token corresponding to the node added. Calling `preserved.release()` on this returned token will release the protection by unlinking the node from the linked list. This scheme scales in O(1) time to release or insert an object vs O(N) or worse time with `R_PreserveObject()` / `R_ReleaseObject()`. These functions are defined in [protect.hpp](https://github.com/r-lib/cpp11/blob/main/inst/include/cpp11/protect.hpp) ### Unwind Protect In R 3.5+ cpp11 uses `R_UnwindProtect` to protect (most) calls to the R API that could fail. These are usually those that allocate memory, though in truth most R API functions could error along some paths. If an error happends under `R_UnwindProtect` cpp11 will throw a C++ exception. This exception is caught by the try catch block defined in the `BEGIN_CPP11` macro in [cpp11/declarations.hpp](https://github.com/r-lib/cpp11/blob/main/inst/include/cpp11/declarations.hpp). The exception will cause any C++ destructors to run, freeing any resources held by C++ objects. After the try catch block exits the R error unwinding is then continued by `R_ContinueUnwind()` and a normal R error results. In R versions prior to 3.5 `R_UnwindProtect()` is not available. Unfortunately the options to emulate it are not ideal. 1. Using `R_TopLevelExec()` works to avoid the C long jump, but because the code is always run in a top level context any errors or messages thrown cannot be caught by `tryCatch()` or similar techniques. 2. Using `R_TryCatch()` is not available prior to R 3.4, and also has a serious bug in R 3.4 (fixed in R 3.5). 3. Calling the R level `tryCatch()` function which contains an expression that runs a C function which then runs the C++ code would be an option, but implementing this is convoluted and it would impact performance, perhaps severely. 4. Have `cpp11::unwind_protect()` be a no-op for these versions. This means any resources held by C++ objects would leak, including cpp11::r_vector / cpp11::sexp objects. None of these options is perfect, here are some pros and cons for each. 1. Causes behavior changes and test failures, so it was ruled out. 2. Was also ruled out since we want to support back to R 3.3. 3. Was ruled out partially because the implementation would be somewhat tricky and more because performance would suffer greatly. 4. is what we now do in cpp11. It leaks protected objects when there are R API errors. If packages are concerned about the leaked memory they can call `cpp11::preserved.release_all()` as needed to release the current protections for all objects managed by cpp11. This is not done automatically because in some cases the protections should persist beyond the `.Call()` boundry, e.g. in vroom altrep objects for example. cpp11/inst/doc/motivations.html0000644000175000017500000067115714151206456016341 0ustar nileshnilesh Motivations for cpp11

Motivations for cpp11

Motivations

R and S have a long history of interacting with compiled languages. In fact the original version of S written in the late 1970s was mainly a wrapper around FORTRAN routines. (History-of-S) Released in 2000, the cxx package was an early prototype of C++ bindings to R. Rcpp was first published to CRAN in 2008, and Rcpp11 in 2014. Of these Rcpp has by far the widest adoption, with over 2000 reverse dependencies as of 2020.

Rcpp has been a widely successful project, however over the years a number of issues and additional C++ features have arisen. Adding these features to Rcpp would require a great deal of work, or in some cases would be impossible without severely breaking backwards compatibility.

cpp11 is a ground up rewrite of C++ bindings to R with different design trade-offs and features.

Changes that motivated cpp11 include:

Copy-on-write semantics

R uses copy-on-write (also called copy-on-modify) semantics. Lets say you have two variables x and y that both point to the same underlying data.

x <- c(1, 2, 3)
y <- x

If you modify y, R will first copy the values of x to a new position, then point y to the new location and only after the copy modify y. This allows x to retain the original values.

y[[3]] <- 4
y
#> [1] 1 2 4

x
#> [1] 1 2 3

C++ does not have copy-on-write built into the language, however it has related concepts, copy-by-value and copy-by-reference. Copy-by-value works similarly to R, except that R only copies when something is changed, C++ always copies.

int x = 42;
int y = x;
y = 0;
// x is still == 42

Copy-by-reference does the opposite, both x and y always point to the same underlying value. In C++ you specify a reference with &.

int x = 42;
int &y = x;
y = 0;
// both x and y are now 0

Copy-by-reference is a valuable technique, as it avoids the overhead of copying the data. However it can also lead to errors when internal functions change their inputs unexpectedly. Rcpp uses copy-by-reference by default (even if you pass a Rcpp vector class by value). This gives Rcpp functions completely different semantics from normal R functions.

We can illustrate this by creating a Rcpp function that multiples its input vector by 2.

#include "Rcpp.h"
using namespace Rcpp;

// [[Rcpp::export]]
NumericVector times_two_rcpp(NumericVector x) {
  for (int i = 0; i < x.size(); ++i) {
    x[i] = x[i] * 2;
  }
  return x;
}

If you do this with regular R functions, you will see the value of y is x * 2, but the value of x is unchanged.

x <- c(1, 2, 3)
y <- x * 2
y
#> [1] 2 4 6

x
#> [1] 1 2 3

However if we now call our times_two_rcpp() function we get the right output value, but now x is also changed.

z <- times_two_rcpp(x)
z
#> [1] 2 4 6

x
#> [1] 2 4 6

cpp11 strives to make its functions behave similarly to normal R functions, while preserving the speed of Rcpp when read only access is needed. Each of the r_vector classes in cpp11 has a normal read only version that uses copy-by-reference, and a writable version which uses copy-by-value.

#include "cpp11/doubles.hpp"

[[cpp11::register]]
cpp11::doubles times_two_cpp11(cpp11::writable::doubles x) {
  for (int i = 0; i < x.size(); ++i) {
    x[i] = x[i] * 2;
  }
  return x;
}

Using cpp11::writable::doubles first copies the input vector, so when we do the multiplication we do not modify the original data.

x <- c(1, 2, 3)

z <- times_two_cpp11(x)
z
#> [1] 2 4 6

x
#> [1] 1 2 3

Improve safety

Internally R is written in C, not C++. In general C and C++ work well together, a large part of C++’s success is due to its high interoperability with C code. However one area in which C and C++ are generally not interoperable is error handling. In C++ the most common way to handle errors is with exceptions.

Exceptions provide a clean, safe way for objects to obtain and cleanup resources automatically even when errors occur.

C safety

The C language does not have support for exceptions, so error handling is done a variety of ways. These include error codes like errno, conditional statements, and in the R codebase the longjmp function.

longjmp, which stands for ‘long jump’ is a function that allows you to transfer the control flow of a program to another location elsewhere in the program. R uses long jumps extensively in its error handling routines. If an R function is executing and an error occurs, a long jump is called which ‘jumps’ the control flow into the error handling code.

Crucially long jumps are incompatible with C++ destructors. If a long jump occurs the destructors of any active C++ objects are not run, and therefore any resources (such as memory, file handles, etc.) managed by those objects will cause a resource leak.

For example, the following unsafe code would leak the memory allocated in the C++ std::vector x when the R API function Rf_allocVector() fails (since you can’t create a vector of -1 size).

std::vector<double> x({1., 2., 3.});

SEXP y = PROTECT(Rf_allocVector(REALSXP, -1));

cpp11 provides two mechanisms to make interfacing with Rs C API and C++ code safer. cpp11::unwind_protect() takes a functional object (a C++11 lamdba function or std::function) and converts any C long jumps encountered to C++ exceptions. Now instead of a C long jump happening when the Rf_allocVector() call fails, a C++ exception occurs, which does trigger the std::vector destructor, so that memory is automatically released.

std::vector<double> x({1., 2., 3.});

SEXP y;
unwind_protect([]() {
  y = Rf_allocVector(REALSXP, -1);
})

cpp11::safe() is a more concise way to wrap a particular R API function with unwind_protect().

std::vector<double> x({1., 2., 3.});

SEXP y = PROTECT(safe[Rf_allocVector](REALSXP, -1));

Again using cpp11::safe() converts the C long jump to a C++ exception, so the memory is automatically released.

cpp11 uses these mechanisms extensively internally when calling the R C API, which make cpp11 much safer against resource leaks than using Rcpp or calling Rs C API by hand.

C++ safety

In the inverse of C safety we also need to ensure that C++ exceptions do not reach the C call stack, as they will terminate R if that occurs. Like Rcpp, cpp11 automatically generates try / catch guards around registered functions to prevent this and also converts C++ exceptions into normal R errors. This is done without developer facing code changes.

With both C and C++ sides of the coin covered we can safely use R’s C API and C++ code together with C++ objects without leaking resources.

Altrep support

ALTREP which stands for ALTernative REPresntations is a feature introduced in R 3.5. ALTREP allows R internals and package authors to define alternative ways of representing data to R. One example of the use of altrep is the : operator.

Prior to R 3.5 : generated a full vector for the entire sequence. e.g. 1:1000 would require 1000 individual values. As of R 3.5 this sequence is instead represented by an ALTREP vector, so none of the values actually exist in memory. Instead each time R access a particular value in the sequence that value is computed on-the-fly. This saves memory and excution time, and allows users to use sequences which would otherwise be too big to fit in memory.

1:1e9
#>  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
#>  [ reached getOption("max.print") -- omitted 999999980 entries ]

Because Rcpp predates the introduction of ALTREP, it does not support the interfaces needed to access ALTREP objects. This means the objects must be converted to normal R objects as soon as they are used by Rcpp.

#include "Rcpp.h"

// [[Rcpp::export]]
Rcpp::IntegerVector identity_rcpp(Rcpp::IntegerVector x) {
  return x;
}
x <- identity_rcpp(1:100000)
lobstr::obj_size(x)
#> 400,728 B

Whereas cpp11 objects preserve the ALTREP object.

#include "cpp11/integers.hpp"

[[cpp11::register]]
cpp11::integers identity_cpp11(cpp11::integers x) {
  return x;
}
y <- identity_cpp11(1:100000)
lobstr::obj_size(y)
#> 680 B

Altrep benchmarks

In these benchmarks note that Rcpp allocates memory for the ALTREP vectors. This is because Rcpp implicitly converts them into normal R vectors. cpp11 retains them as ALTREP vectors, so no additional memory is needed.

foreach and accumulate both use iterators that take advantage of REAL_GET_REGION to buffer queries. This makes them faster than naive C-style for loops with ALTREP vectors.

The for2 case shows an optimization you can use if you know at compile-time that you won’t be dealing with ALTREP vectors. By specifying false to the second argument (is_altrep), you can disable the ALTREP support. This causes the ALTREP conditional code to be compiled out resulting in loop unrolling (and speeds) identical to that generated by Rcpp.

library(cpp11test)

cases <- expand.grid(
  len = 3e6,
  vector = c("normal", "altrep"),
  method = c("for", "foreach", "accumulate"),
  pkg = c("cpp11", "rcpp"),
  stringsAsFactors = FALSE
)

# Add special case
cases <- rbind(list(len = 3e6, vector = "normal", method = "for2", pkg = "cpp11"), cases)

b_sum <- bench::press(
  .grid = cases,
  {
    seq_real <- function(x) as.numeric(seq_len(x))
    funs <- c("normal" = rnorm, "altrep" = seq_real)
    x <- funs[[vector]](len)
    fun <- match.fun(sprintf("%ssum_dbl_%s_", ifelse(pkg == "cpp11", "", paste0(pkg, "_")), method))
    bench::mark(
      fun(x)
    )
  }
)[c("pkg", "method", "vector", "min", "median", "mem_alloc", "itr/sec", "n_gc")]

saveRDS(b_sum, "sum.Rds", version = 2)
knitr::kable(readRDS("sum.Rds"))
pkg method vector min median mem_alloc itr/sec n_gc
cpp11 for2 normal 3.01ms 3.21ms 0B 302.9364 0
cpp11 for normal 2.93ms 3.09ms 0B 319.9100 0
cpp11 for altrep 8.09ms 8.44ms 0B 117.0562 0
cpp11 foreach normal 2.97ms 3.36ms 0B 292.8306 0
cpp11 foreach altrep 4.02ms 4.18ms 0B 236.2339 0
cpp11 accumulate normal 3.03ms 3.24ms 0B 303.3408 0
cpp11 accumulate altrep 4.07ms 4.31ms 0B 225.8066 0
rcpp for normal 2.81ms 3.13ms 0B 311.3724 0
rcpp for altrep 2.81ms 3.13ms 22.9MB 311.6365 0
rcpp foreach normal 2.93ms 3.46ms 0B 293.9831 0
rcpp foreach altrep 2.81ms 3.07ms 22.9MB 313.6250 0
rcpp accumulate normal 2.8ms 3.01ms 0B 321.6647 0
rcpp accumulate altrep 2.75ms 3ms 22.9MB 322.9292 0

cpp11test/src/sum.cpp contains the code ran in these benchmarks.

UTF-8 everywhere

R has complicated support for Unicode strings and non-ASCII code pages, whose behavior often differs substantially on different operating systems, particularly Windows. Correctly dealing with this is challenging and often feels like whack a mole.

To combat this complexity cpp11 uses the UTF-8 everywhere philosophy. This means that whenever text data is converted from R data structures to C++ data structures by cpp11 the data is translated into UTF-8. Conversely any text data coming from C++ code is assumed to be UTF-8 and marked as such for R. Doing this universally avoids many locale specific issues when dealing with Unicode text.

Concretely cpp11 always uses Rf_translateCharUTF8() when obtaining const char* from CHRSXP objects and uses Rf_mkCharCE(, CE_UTF8) when creating new CHRSXP objects from const char* inputs.

C++11 features

C++11 provides a host of new features to the C++ language. cpp11 uses a number of these including

Simpler implementation

Rcpp is very ambitious, with a number of advanced features, including modules, sugar and extensive support for attributes. While these are useful features, many R packages do not use one or any of these advanced features. In addition the code needed to support these features is complex and can be challenging to maintain.

cpp11 takes a more limited scope, providing only the set of r_vector wrappers for R vector types, coercion methods to and from C++ and the limited attributes necessary to support use in R packages.

This limited scope allows the implementation to be much simpler, the headers in Rcpp 1.0.4 have 74,658 lines of code (excluding blank or commented lines) in 379 files. Some headers in Rcpp are automatically generated, removing these still gives you 25,249 lines of code in 357 files. In contrast the headers in cpp11 contain only 1,734 lines of code in 19 files.

This reduction in complexity should make cpp11 an easier project to maintain and ensure correctness, particularly around interactions with the R garbage collector.

Compilation speed

Rcpp always bundles all of its headers together, which causes slow compilation times and high peak memory usage when compiling. The headers in cpp11 are more easily decoupled, so you only can include only the particular headers you actually use in a source file. This can significantly improve the compilation speed and memory usage to compile your package.

Here are some real examples of the reduction in compile time and peak memory usage after converting packages to cpp11.

package Rcpp compile time cpp11 compile time Rcpp peak memory cpp11 peak memory Rcpp commit cpp11 commit
haven 17.42s 7.13s 428MB 204MB a3cf75a4 978cb034
readr 124.13s 81.08s 969MB 684MB ec0d8989 aa89ff72
roxygen2 17.34s 4.24s 371MB 109MB 6f081b75 e8e1e22d
tidyr 14.25s 3.34s 363MB 83MB 3899ed51 60f7c7d4

Header only

Rcpp has long been a mostly header only library, however is not a completely header only library. There have been cases when a package was first installed with version X of Rcpp, and then a newer version of Rcpp was later installed. Then when the original package X was loaded R would crash, because the Application Binary Interface of Rcpp had changed between the two versions.

Because cpp11 consists of exclusively headers this issue does not occur.

Vendoring

In the go community the concept of vendoring is widespread. Vendoring means that you copy the code for the dependencies into your project’s source tree. This ensures the dependency code is fixed and stable until it is updated. Because cpp11 is fully header only you can vendor the code in the same way. cpp11::vendor_cpp11() is provided to do this if you choose.

Vendoring has advantages and drawbacks however. The advantage is that changes to the cpp11 project could never break your existing code. The drawbacks are both minor, your package size is now slightly larger, and major, you no longer get bugfixes and new features until you explicitly update cpp11.

I think the majority of packages should use LinkingTo: cpp11 and not vendor the cpp11 dependency. However, vendoring can be appropriate for certain situations.

Protection

cpp11 uses a custom double linked list data structure to track objects it is managing. This structure is much more efficient for large numbers of objects than using R_PreserveObject() / R_ReleaseObjects() as is done in Rcpp.

library(cpp11test)
grid <- expand.grid(len = c(10 ^ (2:5), 2e5), pkg = c("cpp11", "rcpp"), stringsAsFactors = FALSE)
b_release <- bench::press(.grid = grid,
  {
    fun = match.fun(sprintf("%s_release_", pkg))
    bench::mark(
      fun(len),
      iterations = 1
    )
  }
)[c("len", "pkg", "min")]
saveRDS(b_release, "release.Rds", version = 2)

This plot shows the average time to protect and release a given object is essentially constant for cpp11. Whereas it is linear or worse with the number of objects being tracked for Rcpp.

len pkg min
1e+02 cpp11 26.28µs
1e+03 cpp11 127.51µs
1e+04 cpp11 1.36ms
1e+05 cpp11 14.89ms
2e+05 cpp11 35.62ms
1e+02 rcpp 6.7ms
1e+03 rcpp 1.62ms
1e+04 rcpp 340.38ms
1e+05 rcpp 24.79s
2e+05 rcpp 1.81m

Growing vectors

One major difference in Rcpp and cpp11 is how vectors are grown. Rcpp vectors have a push_back() method, but unlike std::vector() no additional space is reserved when pushing. This makes calling push_back() repeatably very expensive, as the entire vector has to be copied each call.

In contrast cpp11 vectors grow efficiently, reserving extra space. Because of this you can do ~10,000,000 vector appends with cpp11 in approximately the same amount of time that Rcpp does 10,000, as this benchmark demonstrates.

grid <- expand.grid(len = 10 ^ (0:7), pkg = "cpp11", stringsAsFactors = FALSE)
grid <- rbind(
  grid,
  expand.grid(len = 10 ^ (0:4), pkg = "rcpp", stringsAsFactors = FALSE)
)
b_grow <- bench::press(.grid = grid,
  {
    fun = match.fun(sprintf("%sgrow_", ifelse(pkg == "cpp11", "", paste0(pkg, "_"))))
    bench::mark(
      fun(len)
    )
  }
)[c("len", "pkg", "min", "mem_alloc", "n_itr", "n_gc")]
saveRDS(b_grow, "growth.Rds", version = 2)

len pkg min mem_alloc n_itr n_gc
1e+00 cpp11 3.3µs 0B 10000 0
1e+01 cpp11 6.05µs 0B 9999 1
1e+02 cpp11 8.49µs 1.89KB 10000 0
1e+03 cpp11 14.18µs 16.03KB 9999 1
1e+04 cpp11 63.77µs 256.22KB 3477 2
1e+05 cpp11 443.32µs 2MB 404 5
1e+06 cpp11 3.99ms 16MB 70 3
1e+07 cpp11 105.51ms 256MB 1 5
1e+00 rcpp 2.64µs 0B 10000 0
1e+01 rcpp 3.13µs 0B 9999 1
1e+02 rcpp 13.87µs 42.33KB 9997 3
1e+03 rcpp 440.77µs 3.86MB 319 1
1e+04 rcpp 54.13ms 381.96MB 2 2

Conclusion

Rcpp has been and will continue to be widely successful. cpp11 is a alternative implementation of C++ bindings to R that chooses different design trade-offs and features. Both packages can co-exist (even be used in the same package!) and continue to enrich the R community.

cpp11/inst/doc/FAQ.html0000644000175000017500000011764314151206373014365 0ustar nileshnilesh FAQ

FAQ

Below are some Frequently Asked Questions about cpp11. If you have a question that you think would fit well here please open an issue.

1. What are the underlying types of cpp11 objects?

vector element
cpp11::integers int
cpp11::doubles double
cpp11::logical cpp11::r_bool
cpp11::strings cpp11::r_string
cpp11::raws uint8_t
cpp11::list SEXP

2. How do I add elements to a named list?

Use the push_back() method with the named literal syntax. The named literal syntax is defined in the cpp11::literals namespace.

#include <cpp11.hpp>

[[cpp11::register]]
cpp11::list foo_push() {
  using namespace cpp11::literals;

  cpp11::writable::list x;
  x.push_back({"foo"_nm = 1});

  return x;
}

3. Does cpp11 support default arguments?

cpp11 does not support default arguments, while convenient they would require more complexity to support than is currently worthwhile. If you need default argument support you can use a wrapper function around your cpp11 registered function. A common convention is to name the internal function with a trailing _.

#include <cpp11.hpp>
[[cpp11::register]]
double add_some_(double x, double amount) {
  return x + amount;
}
add_some <- function(x, amount = 1) {
  add_some_(x, amount)
}
add_some(1)
#> [1] 2
add_some(1, amount = 5)
#> [1] 6

4. How do I create a new empty list?

Define a new writable list object.

cpp11::writable::list x;

5. How do I retrieve (named) elements from a named vector/list?

Use the [] accessor function.

x["foo"]

6. How can I tell whether a vector is named?

Use the named() method for vector classes.

#include <cpp11.hpp>

[[cpp11::register]]
bool is_named(cpp11::strings x) {
  return x.named();
}
is_named("foo")
#> [1] FALSE

is_named(c(x = "foo"))
#> [1] TRUE

7. How do I return a cpp11::writable::logicals object with only a FALSE value?

You need to use list initialization with {} to create the object.

#include <cpp11.hpp>

[[cpp11::register]]
cpp11::writable::logicals my_false() {
  return {FALSE};
}

[[cpp11::register]]
cpp11::writable::logicals my_true() {
  return {TRUE};
}

[[cpp11::register]]
cpp11::writable::logicals my_both() {
  return {TRUE, FALSE, TRUE};
}
my_false()
#> [1] FALSE

my_true()
#> [1] TRUE

my_both()
#> [1]  TRUE FALSE  TRUE

8. How do I create a new empty environment?

To do this you need to call the base::new.env() function from C++. This can be done by creating a cpp11::function object and then calling it to generate the new environment.

#include <cpp11.hpp>

[[cpp11::register]]
cpp11::environment create_environment() {
  cpp11::function new_env(cpp11::package("base")["new.env"]);
  return new_env();
}

9. How do I assign and retrieve values in an environment? What happens if I try to get a value that doesn’t exist?

Use [] to retrieve or assign values from an environment by name. If a value does not exist it will return R_UnboundValue.

#include <cpp11.hpp>

[[cpp11::register]]
bool foo_exists(cpp11::environment x) {
  return x["foo"] != R_UnboundValue;
}

[[cpp11::register]]
void set_foo(cpp11::environment x, double value) {
  x["foo"] = value;
}
x <- new.env()

foo_exists(x)
#> [1] FALSE

set_foo(x, 1)

foo_exists(x)
#> [1] TRUE

10. How can I create a cpp11:raws from a std::string?

There is no built in way to do this. One method would be to push_back() each element of the string individually.

#include <cpp11.hpp>

[[cpp11::register]]
cpp11::raws push_raws() {
  std::string x("hi");
  cpp11::writable::raws out;

  for (auto c : x) {
    out.push_back(c);
  }

  return out;
}
push_raws()
#> [1] 68 69

11. How can I create a std::string from a cpp11::writable::string?

Because C++ does not allow for two implicit cast, explicitly cast to cpp11::r_string first.

#include <cpp11.hpp>
#include <string>

[[cpp11::register]]
std::string my_string() {
  cpp11::writable::strings x({"foo", "bar"});
  std::string elt = cpp11::r_string(x[0]);
  return elt;
}

12. What are the types for C++ iterators?

The iterators are ::iterator classes contained inside the vector classes. For example the iterator for cpp11::doubles would be cpp11::doubles::iterator and the iterator for cpp11::writable::doubles would be cpp11::writable::doubles::iterator.

13. My code has using namespace std, why do I still have to include std:: in the signatures of [[cpp11::register]] functions?

The using namespace std directive will not be included in the generated code of the function signatures, so they still need to be fully qualified. However you will not need to qualify the type names within those functions.

The following won’t compile

#include <cpp11.hpp>
#include <string>

using namespace std;

[[cpp11::register]]
string foobar() {
  return string("foo") + "-bar";
}

But this will compile and work as intended

#include <cpp11.hpp>
#include <string>

using namespace std;

[[cpp11::register]]
std::string foobar() {
  return string("foo") + "-bar";
}

14. How do I modify a vector in place?

In place modification breaks the normal semantics of R code. In general it should be avoided, which is why cpp11::writable classes always copy their data when constructed.

However if you are positive in-place modification is necessary for your use case you can use the move constructor to do this.

#include <cpp11.hpp>

[[cpp11::register]]
void add_one(cpp11::sexp x_sexp) {
  cpp11::writable::integers x(std::move(x_sexp.data()));
  for (auto&& value : x) {
    ++value;
  }
}
x <- c(1L, 2L, 3L, 4L)
.Internal(inspect(x))
#> @7fbb980cbb88 13 INTSXP g0c2 [REF(2)] (len=4, tl=0) 1,2,3,4
add_one(x)
.Internal(inspect(x))
#> @7fbb980cbb88 13 INTSXP g0c2 [REF(6)] (len=4, tl=0) 2,3,4,5
x
#> [1] 2 3 4 5
cpp11/inst/doc/internals.html0000644000175000017500000005426314151206443015751 0ustar nileshnilesh cpp11 internals

cpp11 internals

The development repository for cpp11 is https://github.com/r-lib/cpp11.

Initial setup and dev workflow

First install any dependencies needed for development.

install.packages("remotes")
remotes::install_deps(dependencies = TRUE)

You can load the package in an interactive R session

devtools::load_all()

Or run the tests with

devtools::test()

test() will also re-compile the package if needed, so you do not always have to run load_all().

If you change the cpp11 headers you will need to clean and recompile the cpp11test package

devtools::clean_dll()
devtools::load_all()

Generally when developing the C++ headers I run R with its working directory in the cpp11test directory and use devtools::test() to run the cpp11tests.

To calculate code coverage of the cpp11 package run the following from the cpp11 root directory.

covr::report(cpp11_coverage())

Code formatting

This project uses clang-format (version 10) to automatically format the c++ code.

You can run make format to re-format all code in the project. If your system does not have clang-format version 10, this can be installed using a homebrew tap at the command line with brew install r-lib/taps/clang-format@10.

You may need to link the newly installed version 10. To do so, run brew unlink clang-format followed by brew link clang-format@10.

Alternatively many IDEs support automatically running clang-format every time files are written.

Code organization

cpp11 is a header only library, so all source code exposed to users lives in inst/include. R code used to register functions and for cpp11::cpp_source() is in R/. Tests for only the code in R/ is in tests/testthat/ The rest of the code is in a separate cpp11test/ package included in the source tree. Inside cpp11test/src the files that start with test- are C++ tests using the Catch support in testthat. In addition there are some regular R tests in cpp11test/tests/testthat/.

Naming conventions

  • All header files are named with a .hpp extension.
  • All source files are named with a .cpp extension.
  • Public header files should be put in inst/include/cpp11
  • Read only r_vector classes and free functions should be put in the cpp11 namespace.
  • Writable r_vector class should be put in the cpp11::writable namespace.
  • Private classes and functions should be put in the cpp11::internal namespace.

Vector classes

All of the basic r_vector classes are class templates, the base template is defined in cpp11/r_vector.hpp The template parameter is the type of value the particular R vector stores, e.g. double for cpp11::doubles. This differs from Rcpp, whose first template parameter is the R vector type, e.g. REALSXP.

The file first has the class declarations, then function definitions further down in the file. Specializations for the various types are in separate files, e.g. cpp11/doubles.hpp, cpp11/integers.hpp

Coercion functions

There are two different coercion functions

as_sexp() takes a C++ object and coerces it to a SEXP object, so it can be used in R. as_cpp<>() is a template function that takes a SEXP and creates a C++ object from it

The various methods for both functions are defined in cpp11/as.hpp

This is definitely the most complex part of the cpp11 code, with extensive use of template metaprogramming. In particular the substitution failure is not an error (SFINAE) technique is used to control overloading of the functions. If we could use C++20 a lot of this code would be made simpler with Concepts, but alas.

The most common C++ types are included in the test suite and should work without issues, as more exotic types are used in real projects additional issues may arise.

Some useful links on SFINAE

Protection

Protect list

cpp11 uses an idea proposed by Luke Tierney to use a double linked list with the head preserved to protect objects cpp11 is protecting.

Each node in the list uses the head (CAR) part to point to the previous node, and the CDR part to point to the next node. The TAG is used to point to the object being protected. The head and tail of the list have R_NilValue as their CAR and CDR pointers respectively.

Calling preserved.insert() with a regular R object will add a new node to the list and return a protect token corresponding to the node added. Calling preserved.release() on this returned token will release the protection by unlinking the node from the linked list.

This scheme scales in O(1) time to release or insert an object vs O(N) or worse time with R_PreserveObject() / R_ReleaseObject().

These functions are defined in protect.hpp

Unwind Protect

In R 3.5+ cpp11 uses R_UnwindProtect to protect (most) calls to the R API that could fail. These are usually those that allocate memory, though in truth most R API functions could error along some paths. If an error happends under R_UnwindProtect cpp11 will throw a C++ exception. This exception is caught by the try catch block defined in the BEGIN_CPP11 macro in cpp11/declarations.hpp. The exception will cause any C++ destructors to run, freeing any resources held by C++ objects. After the try catch block exits the R error unwinding is then continued by R_ContinueUnwind() and a normal R error results.

In R versions prior to 3.5 R_UnwindProtect() is not available. Unfortunately the options to emulate it are not ideal.

  1. Using R_TopLevelExec() works to avoid the C long jump, but because the code is always run in a top level context any errors or messages thrown cannot be caught by tryCatch() or similar techniques.
  2. Using R_TryCatch() is not available prior to R 3.4, and also has a serious bug in R 3.4 (fixed in R 3.5).
  3. Calling the R level tryCatch() function which contains an expression that runs a C function which then runs the C++ code would be an option, but implementing this is convoluted and it would impact performance, perhaps severely.
  4. Have cpp11::unwind_protect() be a no-op for these versions. This means any resources held by C++ objects would leak, including cpp11::r_vector / cpp11::sexp objects.

None of these options is perfect, here are some pros and cons for each.

  1. Causes behavior changes and test failures, so it was ruled out.
  2. Was also ruled out since we want to support back to R 3.3.
  3. Was ruled out partially because the implementation would be somewhat tricky and more because performance would suffer greatly.
  4. is what we now do in cpp11. It leaks protected objects when there are R API errors.

If packages are concerned about the leaked memory they can call cpp11::preserved.release_all() as needed to release the current protections for all objects managed by cpp11. This is not done automatically because in some cases the protections should persist beyond the .Call() boundry, e.g. in vroom altrep objects for example.

cpp11/inst/doc/cpp11.R0000644000175000017500000001322014151206443014117 0ustar nileshnilesh## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = as.logical(Sys.getenv("CPP11_EVAL", "false")) ) ## ----setup-------------------------------------------------------------------- library(cpp11) ## ----add---------------------------------------------------------------------- cpp_function('int add(int x, int y, int z) { int sum = x + y + z; return sum; }') # add works like a regular R function add add(1, 2, 3) ## ----one-r-------------------------------------------------------------------- one <- function() 1L ## ----one-cpp------------------------------------------------------------------ cpp_function('int one() { return 1; }') ## ----sign--------------------------------------------------------------------- sign_r <- function(x) { if (x > 0) { 1 } else if (x == 0) { 0 } else { -1 } } cpp_function('int sign_cpp(int x) { if (x > 0) { return 1; } else if (x == 0) { return 0; } else { return -1; } }') ## ----sum-r-------------------------------------------------------------------- sum_r <- function(x) { total <- 0 for (i in seq_along(x)) { total <- total + x[i] } total } ## ----sum-cpp------------------------------------------------------------------ cpp_function('double sum_cpp(doubles x) { int n = x.size(); double total = 0; for(int i = 0; i < n; ++i) { total += x[i]; } return total; }') ## ----sum-bench---------------------------------------------------------------- x <- runif(1e3) bench::mark( sum(x), sum_cpp(x), sum_r(x) )[1:6] ## ----pdist-r------------------------------------------------------------------ pdist_r <- function(x, ys) { sqrt((x - ys) ^ 2) } ## ----pdist-cpp---------------------------------------------------------------- cpp_function('doubles pdist_cpp(double x, doubles ys) { int n = ys.size(); writable::doubles out(n); for(int i = 0; i < n; ++i) { out[i] = sqrt(pow(ys[i] - x, 2.0)); } return out; }') ## ----------------------------------------------------------------------------- y <- runif(1e6) bench::mark( pdist_r(0.5, y), pdist_cpp(0.5, y) )[1:6] ## ---- include = FALSE--------------------------------------------------------- # 5e-3 * x == 2e-3 * x + 10 * 60 600 / (5e-3 - 2e-3) ## ----------------------------------------------------------------------------- mod <- lm(mpg ~ wt, data = mtcars) mpe(mod) ## ----------------------------------------------------------------------------- call_with_one(function(x) x + 1) call_with_one(paste) ## ----------------------------------------------------------------------------- str(scalar_missings()) ## ----------------------------------------------------------------------------- cpp_eval("NAN == 1") cpp_eval("NAN < 1") cpp_eval("NAN > 1") cpp_eval("NAN == NAN") ## ----------------------------------------------------------------------------- cpp_eval("NAN && TRUE") cpp_eval("NAN || FALSE") ## ----------------------------------------------------------------------------- cpp_eval("NAN + 1") cpp_eval("NAN - 1") cpp_eval("NAN / 1") cpp_eval("NAN * 1") ## ----------------------------------------------------------------------------- str(missing_sampler()) ## ---- include = FALSE, error = FALSE------------------------------------------ # Verify that our sum implementations work local({ x <- c(.5, .1, .3, .7, 12.) stopifnot(identical(sum(x), sum2(x))) stopifnot(identical(sum(x), sum3(x))) stopifnot(identical(sum(x), sum4(x))) }) ## ---- include = FALSE, error = FALSE------------------------------------------ # Verify that our findInterval2 implementation works local({ n <- 1e3 x <- sort(round(stats::rt(n, df = 2), 2)) tt <- c(-n, seq(-2, 2, length = n + 1), n) stopifnot(identical(findInterval(tt, x), findInterval2(tt, x))) }) ## ----------------------------------------------------------------------------- gibbs_r <- function(N, thin) { mat <- matrix(nrow = N, ncol = 2) x <- y <- 0 for (i in 1:N) { for (j in 1:thin) { x <- rgamma(1, 3, y * y + 4) y <- rnorm(1, 1 / (x + 1), 1 / sqrt(2 * (x + 1))) } mat[i, ] <- c(x, y) } mat } ## ----------------------------------------------------------------------------- bench::mark( r = { set.seed(42) gibbs_r(100, 10) }, cpp = { set.seed(42) gibbs_cpp(100, 10) }, check = TRUE, relative = TRUE ) ## ----------------------------------------------------------------------------- vacc1a <- function(age, female, ily) { p <- 0.25 + 0.3 * 1 / (1 - exp(0.04 * age)) + 0.1 * ily p <- p * if (female) 1.25 else 0.75 p <- max(0, p) p <- min(1, p) p } ## ----------------------------------------------------------------------------- vacc1 <- function(age, female, ily) { n <- length(age) out <- numeric(n) for (i in seq_len(n)) { out[i] <- vacc1a(age[i], female[i], ily[i]) } out } ## ----------------------------------------------------------------------------- vacc2 <- function(age, female, ily) { p <- 0.25 + 0.3 * 1 / (1 - exp(0.04 * age)) + 0.1 * ily p <- p * ifelse(female, 1.25, 0.75) p <- pmax(0, p) p <- pmin(1, p) p } ## ----------------------------------------------------------------------------- n <- 1000 age <- rnorm(n, mean = 50, sd = 10) female <- sample(c(T, F), n, rep = TRUE) ily <- sample(c(T, F), n, prob = c(0.8, 0.2), rep = TRUE) stopifnot( all.equal(vacc1(age, female, ily), vacc2(age, female, ily)), all.equal(vacc1(age, female, ily), vacc3(age, female, ily)) ) ## ----------------------------------------------------------------------------- bench::mark( vacc1 = vacc1(age, female, ily), vacc2 = vacc2(age, female, ily), vacc3 = vacc3(age, female, ily) ) cpp11/inst/doc/FAQ.R0000644000175000017500000000171614151206372013612 0ustar nileshnilesh## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(cpp11) ## ----------------------------------------------------------------------------- add_some <- function(x, amount = 1) { add_some_(x, amount) } add_some(1) add_some(1, amount = 5) ## ----------------------------------------------------------------------------- is_named("foo") is_named(c(x = "foo")) ## ----------------------------------------------------------------------------- my_false() my_true() my_both() ## ----------------------------------------------------------------------------- x <- new.env() foo_exists(x) set_foo(x, 1) foo_exists(x) ## ----------------------------------------------------------------------------- push_raws() ## ----------------------------------------------------------------------------- x <- c(1L, 2L, 3L, 4L) .Internal(inspect(x)) add_one(x) .Internal(inspect(x)) x cpp11/inst/doc/internals.R0000644000175000017500000000021714151206443015174 0ustar nileshnilesh## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) cpp11/inst/doc/cpp11.html0000644000175000017500000040720414151206443014673 0ustar nileshnilesh Get started with cpp11

Get started with cpp11

This content is adapted (with permission) from the Rcpp chapter of Hadley Wickham’s book Advanced R.

Introduction

Sometimes R code just isn’t fast enough. You’ve used profiling to figure out where your bottlenecks are, and you’ve done everything you can in R, but your code still isn’t fast enough. In this vignette you’ll learn how to improve performance by rewriting key functions in C++. This magic comes by way of the cpp11 package.

cpp11 makes it very simple to connect C++ to R. While it is possible to write C or Fortran code for use in R, it will be painful by comparison. cpp11 provides a clean, approachable API that lets you write high-performance code, insulated from R’s more complex C API.

Typical bottlenecks that C++ can address include:

  • Loops that can’t be easily vectorised because subsequent iterations depend on previous ones.

  • Recursive functions, or problems which involve calling functions millions of times. The overhead of calling a function in C++ is much lower than in R.

  • Problems that require advanced data structures and algorithms that R doesn’t provide. Through the standard template library (STL), C++ has efficient implementations of many important data structures, from ordered maps to double-ended queues.

The aim of this vignette is to discuss only those aspects of C++ and cpp11 that are absolutely necessary to help you eliminate bottlenecks in your code. We won’t spend much time on advanced features like object-oriented programming or templates because the focus is on writing small, self-contained functions, not big programs. A working knowledge of C++ is helpful, but not essential. Many good tutorials and references are freely available, including https://www.learncpp.com/ and https://en.cppreference.com/w/cpp. For more advanced topics, the Effective C++ series by Scott Meyers is a popular choice.

Outline

  • Section intro teaches you how to write C++ by converting simple R functions to their C++ equivalents. You’ll learn how C++ differs from R, and what the key scalar, vector, and matrix classes are called.

  • Section cpp_source shows you how to use cpp11::cpp_source() to load a C++ file from disk in the same way you use source() to load a file of R code.

  • Section classes discusses how to modify attributes from cpp11, and mentions some of the other important classes.

  • Section na teaches you how to work with R’s missing values in C++.

  • Section stl shows you how to use some of the most important data structures and algorithms from the standard template library, or STL, built-in to C++.

  • Section case-studies shows two real case studies where cpp11 was used to get considerable performance improvements.

  • Section package teaches you how to add C++ code to an R package.

  • Section more concludes the vignette with pointers to more resources to help you learn cpp11 and C++.

Prerequisites

We’ll use cpp11 to call C++ from R:

library(cpp11)

You’ll also need a working C++ compiler. To get it:

  • On Windows, install Rtools.
  • On Mac, install Xcode from the app store.
  • On Linux, sudo apt-get install r-base-dev or similar.

Getting started with C++

cpp_function() allows you to write C++ functions in R:

cpp_function('int add(int x, int y, int z) {
  int sum = x + y + z;
  return sum;
}')
# add works like a regular R function
add
#> function (x, y, z) 
#> {
#>     .Call("_code_994c55afa444_add", x, y, z, PACKAGE = "code_994c55afa444")
#> }
add(1, 2, 3)
#> [1] 6

When you run the above code, cpp11 will compile the C++ code and construct an R function that connects to the compiled C++ function. There’s a lot going on underneath the hood but cpp11 takes care of all the details so you don’t need to worry about them.

The following sections will teach you the basics by translating simple R functions to their C++ equivalents. We’ll start simple with a function that has no inputs and a scalar output, and then make it progressively more complicated:

  • Scalar input and scalar output
  • Vector input and scalar output
  • Vector input and vector output
  • Matrix input and vector output

No inputs, scalar output

Let’s start with a very simple function. It has no arguments and always returns the integer 1:

one <- function() 1L

The equivalent C++ function is:

int one() {
  return 1;
}

We can compile and use this from R with cpp_function()

cpp_function('int one() {
  return 1;
}')

This small function illustrates a number of important differences between R and C++:

  • The syntax to create a function looks like the syntax to call a function; you don’t use assignment to create functions as you do in R.

  • You must declare the type of output the function returns. This function returns an int (a scalar integer). The classes for the most common types of R vectors are: doubles, integers, strings, and logicals.

  • Scalars and vectors are different. The scalar equivalents of numeric, integer, character, and logical vectors are: double, int, String, and bool.

  • You must use an explicit return statement to return a value from a function.

  • Every statement is terminated by a ;.

Scalar input, scalar output

The next example function implements a scalar version of the sign() function which returns 1 if the input is positive, and -1 if it’s negative:

sign_r <- function(x) {
  if (x > 0) {
    1
  } else if (x == 0) {
    0
  } else {
    -1
  }
}
cpp_function('int sign_cpp(int x) {
  if (x > 0) {
    return 1;
  } else if (x == 0) {
    return 0;
  } else {
    return -1;
  }
}')

In the C++ version:

  • We declare the type of each input in the same way we declare the type of the output. While this makes the code a little more verbose, it also makes clear the type of input the function needs.

  • The if syntax is identical — while there are some big differences between R and C++, there are also lots of similarities! C++ also has a while statement that works the same way as R’s. As in R you can use break to exit the loop, but to skip one iteration you need to use continue instead of next.

Vector input, scalar output

One big difference between R and C++ is that the cost of loops is much lower in C++. For example, we could implement the sum function in R using a loop. If you’ve been programming in R a while, you’ll probably have a visceral reaction to this function!

sum_r <- function(x) {
  total <- 0
  for (i in seq_along(x)) {
    total <- total + x[i]
  }
  total
}

In C++, loops have very little overhead, so it’s fine to use them. In Section stl, you’ll see alternatives to for loops that more clearly express your intent; they’re not faster, but they can make your code easier to understand.

cpp_function('double sum_cpp(doubles x) {
  int n = x.size();
  double total = 0;
  for(int i = 0; i < n; ++i) {
    total += x[i];
  }
  return total;
}')

The C++ version is similar, but:

  • To find the length of the vector, we use the .size() method, which returns an integer. C++ methods are called with . (i.e., a full stop).

  • The for statement has a different syntax: for(init; check; increment). This loop is initialised by creating a new variable called i with value 0. Before each iteration we check that i < n, and terminate the loop if it’s not. After each iteration, we increment the value of i by one, using the special prefix operator ++ which increases the value of i by 1.

  • In C++, vector indices start at 0, which means that the last element is at position n - 1. I’ll say this again because it’s so important: IN C++, VECTOR INDICES START AT 0! This is a very common source of bugs when converting R functions to C++.

  • Use = for assignment, not <-.

  • C++ provides operators that modify in-place: total += x[i] is equivalent to total = total + x[i]. Similar in-place operators are -=, *=, and /=.

This is a good example of where C++ is much more efficient than R. As shown by the following microbenchmark, sumC() is competitive with the built-in (and highly optimised) sum(), while sumR() is several orders of magnitude slower.

x <- runif(1e3)
bench::mark(
  sum(x),
  sum_cpp(x),
  sum_r(x)
)[1:6]
#> # A tibble: 3 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 sum(x)       2.11µs   2.17µs   405139.        0B      0  
#> 2 sum_cpp(x)   3.18µs   3.43µs   204120.        0B     20.4
#> 3 sum_r(x)    21.53µs  25.03µs    37470.    21.3KB      0

Vector input, vector output

Next we’ll create a function that computes the Euclidean distance between a value and a vector of values:

pdist_r <- function(x, ys) {
  sqrt((x - ys) ^ 2)
}

In R, it’s not obvious that we want x to be a scalar from the function definition, and we’d need to make that clear in the documentation. That’s not a problem in the C++ version because we have to be explicit about types:

cpp_function('doubles pdist_cpp(double x, doubles ys) {
  int n = ys.size();
  writable::doubles out(n);
  for(int i = 0; i < n; ++i) {
    out[i] = sqrt(pow(ys[i] - x, 2.0));
  }
  return out;
}')

This function introduces a few new concepts:

  • Because we are creating a new vector we need to use writable::doubles rather than the read-only doubles.

  • We create a new numeric vector of length n with a constructor: cpp11::writable::doubles out(n). Another useful way of making a vector is to copy an existing one: cpp11::doubles zs(ys).

  • C++ uses pow(), not ^, for exponentiation.

Note that because the R version is fully vectorised, it’s already going to be fast.

y <- runif(1e6)
bench::mark(
  pdist_r(0.5, y),
  pdist_cpp(0.5, y)
)[1:6]
#> # A tibble: 2 × 6
#>   expression             min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>        <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 pdist_r(0.5, y)     4.14ms   4.58ms      187.    7.63MB     97.6
#> 2 pdist_cpp(0.5, y)   2.56ms   2.64ms      318.    7.63MB    133.

On my computer, it takes around 5 ms with a 1 million element y vector. The C++ function is about 2.5 times faster, ~2 ms, but assuming it took you 10 minutes to write the C++ function, you’d need to run it ~200,000 times to make rewriting worthwhile. The reason why the C++ function is faster is subtle, and relates to memory management. The R version needs to create an intermediate vector the same length as y (x - ys), and allocating memory is an expensive operation. The C++ function avoids this overhead because it uses an intermediate scalar.

Using cpp_source

So far, we’ve used inline C++ with cpp_function(). This makes presentation simpler, but for real problems, it’s usually easier to use stand-alone C++ files and then source them into R using cpp_source(). This lets you take advantage of text editor support for C++ files (e.g., syntax highlighting) as well as making it easier to identify the line numbers in compilation errors.

Your stand-alone C++ file should have extension .cpp, and needs to start with:

#include "cpp11.hpp"
using namespace cpp11;

And for each function that you want available within R, you need to prefix it with:

[[cpp11::register]]

If you’re familiar with roxygen2, you might wonder how this relates to @export. cpp11::register registers a C++ function to be called from R. @export controls whether a function is exported from a package and made available to the user.

To compile the C++ code, use cpp_source("path/to/file.cpp"). This will create the matching R functions and add them to your current session. Note that these functions can not be saved in a .Rdata file and reloaded in a later session; they must be recreated each time you restart R.

This example also illustrates a different kind of a for loop, a for-each loop.

#include "cpp11/doubles.hpp"
using namespace cpp11;

[[cpp11::register]]
double mean_cpp(doubles x) {
  int n = x.size();
  double total = 0;
  for(double value : x) {
    total += value;
  }
  return total / n;
}

NB: if you run this code, you’ll notice that mean_cpp() is faster than the built-in mean(). This is because it trades numerical accuracy for speed.

For the remainder of this vignette C++ code will be presented stand-alone rather than wrapped in a call to cpp_function. If you want to try compiling and/or modifying the examples you should paste them into a C++ source file that includes the elements described above. This is easy to do in RMarkdown by using {cpp11} instead of {r} at the beginning of your code blocks.

Exercises

  1. With the basics of C++ in hand, it’s now a great time to practice by reading and writing some simple C++ functions. For each of the following functions, read the code and figure out what the corresponding base R function is. You might not understand every part of the code yet, but you should be able to figure out the basics of what the function does.
#include "cpp11.hpp"

using namespace cpp11;
namespace writable = cpp11::writable;

[[cpp11::register]]
double f1(doubles x) {
  int n = x.size();
  double y = 0;

  for(int i = 0; i < n; ++i) {
    y += x[i] / n;
  }
  return y;
}

[[cpp11::register]]
doubles f2(doubles x) {
  int n = x.size();
  writable::doubles out(n);

  out[0] = x[0];
  for(int i = 1; i < n; ++i) {
    out[i] = out[i - 1] + x[i];
  }
  return out;
}

[[cpp11::register]]
bool f3(logicals x) {
  int n = x.size();

  for(int i = 0; i < n; ++i) {
    if (x[i]) {
      return true;
    }
  }
  return false;
}

[[cpp11::register]]
int f4(cpp11::function pred, list x) {
  int n = x.size();

  for(int i = 0; i < n; ++i) {
    logicals res(pred(x[i]));
    if (res[0]) {
      return i + 1;
    }
  }
  return 0;
}
  1. To practice your function writing skills, convert the following functions into C++. For now, assume the inputs have no missing values.

    1. all().

    2. cumprod(), cummin(), cummax().

    3. diff(). Start by assuming lag 1, and then generalise for lag n.

    4. range().

    5. var(). Read about the approaches you can take on Wikipedia. Whenever implementing a numerical algorithm, it’s always good to check what is already known about the problem.

Other classes

You’ve already seen the basic vector classes (integers, doubles, logicals, strings) and their scalar (int, double, bool, string) equivalents. cpp11 also provides wrappers for other base data types. The most important are for lists and data frames, functions, and attributes, as described below.

Lists and data frames

cpp11 also provides list and data_frame classes, but they are more useful for output than input. This is because lists and data frames can contain arbitrary classes but C++ needs to know their classes in advance. If the list has known structure (e.g., it’s an S3 object), you can extract the components and manually convert them to their C++ equivalents with as_cpp(). For example, the object created by lm(), the function that fits a linear model, is a list whose components are always of the same type.

The following code illustrates how you might extract the mean percentage error (mpe()) of a linear model. This isn’t a good example of when to use C++, because it’s so easily implemented in R, but it shows how to work with an important S3 class. Note the use of Rf_inherits() and the stop() to check that the object really is a linear model.

#include "cpp11.hpp"
using namespace cpp11;

[[cpp11::register]]
double mpe(list mod) {
  if (!Rf_inherits(mod, "lm")) {
    stop("Input must be a linear model");
  }
  doubles resid(mod["residuals"]);
  doubles fitted(mod["fitted.values"]);
  int n = resid.size();
  double err = 0;
  for(int i = 0; i < n; ++i) {
    err += resid[i] / (fitted[i] + resid[i]);
  }
  return err / n;
}
mod <- lm(mpg ~ wt, data = mtcars)
mpe(mod)
#> [1] -0.01541615

Functions

You can put R functions in an object of type function. This makes calling an R function from C++ straightforward. The only challenge is that we don’t know what type of output the function will return, so we use the catchall type sexp. This stands for S-Expression and is used as the type of all R Objects in the internal C code.

#include "cpp11.hpp"
using namespace cpp11;
namespace writable = cpp11::writable;

[[cpp11::register]]
sexp call_with_one(function f) {
  return f(1);
}
call_with_one(function(x) x + 1)
#> [1] 2
call_with_one(paste)
#> [1] "1"

Calling R functions with positional arguments is obvious:

f("y", 1);

But you need a special syntax for named arguments:

using namespace cpp11::literals;

f("x"_nm = "y", "value"_nm = 1);

Attributes

All R objects have attributes, which can be queried and modified with .attr(). cpp11 also provides .names() as an alias for the names attribute. The following code snippet illustrates these methods. Note the use of {} initializer list syntax. This allows you to create an R vector from C++ scalar values:

#include "cpp11.hpp"
using namespace cpp11;
namespace writable = cpp11::writable;

[[cpp11::register]]
doubles attribs() {
  writable::doubles out = {1., 2., 3.};
  out.names() = {"a", "b", "c"};
  out.attr("my-attr") = "my-value";
  out.attr("class") = "my-class";
  return out;
}

Missing values

If you’re working with missing values, you need to know two things: * How R’s missing values behave in C++’s scalars (e.g., double). * How to get and set missing values in vectors (e.g., doubles).

Scalars

The following code explores what happens when you take one of R’s missing values, coerce it into a scalar, and then coerce back to an R vector. Note that this kind of experimentation is a useful way to figure out what any operation does.

#include "cpp11.hpp"
using namespace cpp11;

[[cpp11::register]]
list scalar_missings() {
  int int_s = NA_INTEGER;
  r_string chr_s = NA_STRING;
  bool lgl_s = NA_LOGICAL;
  double num_s = NA_REAL;
  return writable::list({as_sexp(int_s), as_sexp(chr_s), as_sexp(lgl_s), as_sexp(num_s)});
}
str(scalar_missings())
#> List of 4
#>  $ : int NA
#>  $ : chr NA
#>  $ : logi TRUE
#>  $ : num NA

With the exception of bool, things look pretty good here: all of the missing values have been preserved. However, as we’ll see in the following sections, things are not quite as straightforward as they seem.

Integers

With integers, missing values are stored as the smallest integer. If you don’t do anything to them, they’ll be preserved. But, since C++ doesn’t know that the smallest integer has this special behaviour, if you do anything to it you’re likely to get an incorrect value: for example, cpp_eval('NA_INTEGER + 1') gives -2147483647.

So if you want to work with missing values in integers, either use a length 1 integers or be very careful with your code.

Doubles

With doubles, you may be able to get away with ignoring missing values and working with NaNs (not a number). This is because R’s NA is a special type of IEEE 754 floating point number NaN. So any logical expression that involves a NaN (or in C++, NAN) always evaluates as FALSE:

cpp_eval("NAN == 1")
#> [1] FALSE
cpp_eval("NAN < 1")
#> [1] FALSE
cpp_eval("NAN > 1")
#> [1] FALSE
cpp_eval("NAN == NAN")
#> [1] FALSE

(Here I’m using cpp_eval() which allows you to see the result of running a single C++ expression, making it excellent for this sort of interactive experimentation.) But be careful when combining them with Boolean values:

cpp_eval("NAN && TRUE")
#> [1] TRUE
cpp_eval("NAN || FALSE")
#> [1] TRUE

However, in numeric contexts NaNs will propagate NAs:

cpp_eval("NAN + 1")
#> [1] NaN
cpp_eval("NAN - 1")
#> [1] NaN
cpp_eval("NAN / 1")
#> [1] NaN
cpp_eval("NAN * 1")
#> [1] NaN

Strings

String is a scalar string class introduced by cpp11, so it knows how to deal with missing values.

Boolean

C++’s bool has two possible values (true or false), a logical vector in R has three (TRUE, FALSE, and NA). If you coerce a length 1 logical vector, make sure it doesn’t contain any missing values; otherwise they will be converted to TRUE. One way to fix this is to use int instead, as this can represent TRUE, FALSE, and NA.

Vectors

With vectors, you need to use a missing value specific to the type of vector, NA_REAL, NA_INTEGER, NA_LOGICAL, NA_STRING:

#include "cpp11.hpp"
using namespace cpp11;
namespace writable = cpp11::writable;

[[cpp11::register]]
list missing_sampler() {
  return writable::list({
    writable::doubles({NA_REAL}),
    writable::integers({NA_INTEGER}),
    writable::logicals({r_bool(NA_LOGICAL)}),
    writable::strings({NA_STRING})
  });
}
str(missing_sampler())
#> List of 4
#>  $ : num NA
#>  $ : int NA
#>  $ : logi NA
#>  $ : chr NA

Exercises

  1. Rewrite any of the functions from the first exercise to deal with missing values. If na_rm is true, ignore the missing values. If na_rm is false, return a missing value if the input contains any missing values. Some good functions to practice with are min(), max(), range(), mean(), and var().

  2. Rewrite cumsum() and diff() so they can handle missing values. Note that these functions have slightly more complicated behaviour.

Standard Template Library

The real strength of C++ is revealed when you need to implement more complex algorithms. The standard template library (STL) provides a set of extremely useful data structures and algorithms. This section will explain some of the most important algorithms and data structures and point you in the right direction to learn more. I can’t teach you everything you need to know about the STL, but hopefully the examples will show you the power of the STL, and persuade you that it’s useful to learn more.

If you need an algorithm or data structure that isn’t implemented in STL, one place to look is boost. Installing boost on your computer is beyond the scope of this vignette, but once you have it installed, you can use boost data structures and algorithms by including the appropriate header file with (e.g.) #include <boost/array.hpp>.

Using iterators

Iterators are used extensively in the STL: many functions either accept or return iterators. They are the next step up from basic loops, abstracting away the details of the underlying data structure. Iterators have three main operators:

  1. Advance with ++.
  2. Get the value they refer to, or dereference, with *.
  3. Compare with ==.

For example we could re-write our sum function using iterators:

#include "cpp11.hpp"
using namespace cpp11;

[[cpp11::register]]
double sum2(doubles x) {
  double total = 0;

  for(auto it = x.begin(); it != x.end(); ++it) {
    total += *it;
  }
  return total;
}

The main changes are in the for loop:

  • We start at x.begin() and loop until we get to x.end(). A small optimization is to store the value of the end iterator so we don’t need to look it up each time. This only saves about 2 ns per iteration, so it’s only important when the calculations in the loop are very simple.

  • Instead of indexing into x, we use the dereference operator to get its current value: *it.

  • Notice we use auto rather than giving the type of the iterator.

This code can be simplified still further through the use of a C++11 feature: range-based for loops.

#include "cpp11.hpp"
using namespace cpp11;

[[cpp11::register]]
double sum3(doubles xs) {
  double total = 0;

  for(auto x : xs) {
    total += x;
  }
  return total;
}

Iterators also allow us to use the C++ equivalents of the apply family of functions. For example, we could again rewrite sum() to use the accumulate() function, which takes a starting and an ending iterator, and adds up all the values in the vector. The third argument to accumulate gives the initial value: it’s particularly important because this also determines the data type that accumulate uses (so we use 0.0 and not 0 so that accumulate uses a double, not an int.). To use accumulate() we need to include the <numeric> header.

#include <numeric>
#include "cpp11.hpp"
using namespace cpp11;

[[cpp11::register]]
double sum4(doubles x) {
  return std::accumulate(x.begin(), x.end(), 0.0);
}

Algorithms

The <algorithm> header provides a large number of algorithms that work with iterators. A good reference is available at https://en.cppreference.com/w/cpp/algorithm. For example, we could write a basic cpp11 version of findInterval() that takes two arguments, a vector of values and a vector of breaks, and locates the bin that each x falls into. This shows off a few more advanced iterator features. Read the code below and see if you can figure out how it works.

#include <algorithm>
#include "cpp11.hpp"
using namespace cpp11;

[[cpp11::register]] integers findInterval2(doubles x, doubles breaks) {
  writable::integers out(x.size());
  auto out_it = out.begin();

  for (auto&& val : x) {
    auto pos = std::upper_bound(breaks.begin(), breaks.end(), val);
    *out_it = std::distance(breaks.begin(), pos);
    ++out_it;
  }
  return out;
}

The key points are:

  • We step through two iterators (input and output) simultaneously.

  • We can assign into an dereferenced iterator (out_it) to change the values in out.

  • upper_bound() returns an iterator. If we wanted the value of the upper_bound() we could dereference it; to figure out its location, we use the distance() function.

When in doubt, it is generally better to use algorithms from the STL than hand rolled loops. In Effective STL, Scott Meyers gives three reasons: efficiency, correctness, and maintainability. Algorithms from the STL are written by C++ experts to be extremely efficient, and they have been around for a long time so they are well tested. Using standard algorithms also makes the intent of your code more clear, helping to make it more readable and more maintainable.

Data structures

The STL provides a large set of data structures: array, bitset, list, forward_list, map, multimap, multiset, priority_queue, queue, deque, set, stack, unordered_map, unordered_set, unordered_multimap, unordered_multiset, and vector. The most important of these data structures are the vector, the unordered_set, and the unordered_map. We’ll focus on these three in this section, but using the others is similar: they just have different performance trade-offs. For example, the deque (pronounced “deck”) has a very similar interface to vectors but a different underlying implementation that has different performance trade-offs. You may want to try it for your problem. A good reference for STL data structures is https://en.cppreference.com/w/cpp/container — I recommend you keep it open while working with the STL.

cpp11 knows how to convert from many STL data structures to their R equivalents, so you can return them from your functions without explicitly converting to R data structures.

Vectors

An STL vector is very similar to an R vector, except that it grows efficiently. This makes STL vectors appropriate to use when you don’t know in advance how big the output will be. Vectors are templated, which means that you need to specify the type of object the vector will contain when you create it: vector<int>, vector<bool>, vector<double>, vector<string>. You can access individual elements of a vector using the standard [] notation, and you can add a new element to the end of the vector using .push_back(). If you have some idea in advance how big the vector will be, you can use .reserve() to allocate sufficient storage.

The following code implements run length encoding (rle()). It produces two vectors of output: a vector of values, and a vector lengths giving how many times each element is repeated. It works by looping through the input vector x comparing each value to the previous: if it’s the same, then it increments the last value in lengths; if it’s different, it adds the value to the end of values, and sets the corresponding length to 1.

#include "cpp11.hpp"
#include <vector>
using namespace cpp11;
namespace writable = cpp11::writable;

[[cpp11::register]]
list rle_cpp(doubles x) {
  std::vector<int> lengths;
  std::vector<double> values;

  // Initialise first value
  int i = 0;
  double prev = x[0];
  values.push_back(prev);
  lengths.push_back(1);

  for(auto it = x.begin() + 1; it != x.end(); ++it) {
    if (prev == *it) {
      lengths[i]++;
    } else {
      values.push_back(*it);
      lengths.push_back(1);
      i++;
      prev = *it;
    }
  }
  return writable::list({
    "lengths"_nm = lengths,
    "values"_nm = values
  });
}

(An alternative implementation would be to replace i with the iterator lengths.rbegin() which always points to the last element of the vector. You might want to try implementing that.)

Other methods of a vector are described at https://en.cppreference.com/w/cpp/container/vector.

Sets

Sets maintain a unique set of values, and can efficiently tell if you’ve seen a value before. They are useful for problems that involve duplicates or unique values (like unique, duplicated, or in). C++ provides both ordered (std::set) and unordered sets (std::unordered_set), depending on whether or not order matters for you. Unordered sets can somtimes be much faster (because they use a hash table internally rather than a tree). Often even if you need an ordered set, you could consider using an unordered set and then sorting the output. Benchmarking with your expected dataset is the best way to determine which is fastest for your data. Like vectors, sets are templated, so you need to request the appropriate type of set for your purpose: unordered_set<int>, unordered_set<bool>, etc. More details are available at https://en.cppreference.com/w/cpp/container/set and https://en.cppreference.com/w/cpp/container/unordered_set.

The following function uses an unordered set to implement an equivalent to duplicated() for integer vectors. Note the use of seen.insert(x[i]).second. insert() returns a pair, the .first value is an iterator that points to element and the .second value is a Boolean that’s true if the value was a new addition to the set.

#include <unordered_set>
#include "cpp11.hpp"
using namespace cpp11;
namespace writable = cpp11::writable;

[[cpp11::register]]
logicals duplicated_cpp(integers x) {
  std::unordered_set<int> seen;
  int n = x.size();
  writable::logicals out(n);
  for (int i = 0; i < n; ++i) {
    out[i] = !seen.insert(x[i]).second;
  }
  return out;
}

Exercises

To practice using the STL algorithms and data structures, implement the following using R functions in C++, using the hints provided:

  1. median.default() using partial_sort.

  2. %in% using unordered_set and the find() or count() methods.

  3. unique() using an unordered_set (challenge: do it in one line!).

  4. min() using std::min(), or max() using std::max().

  5. which.min() using min_element, or which.max() using max_element.

  6. setdiff(), union(), and intersect() for integers using sorted ranges and set_union, set_intersection and set_difference.

Case studies

The following case studies illustrate some real life uses of C++ to replace slow R code.

Gibbs sampler

The following case study updates an example blogged about by Dirk Eddelbuettel, illustrating the conversion of a Gibbs sampler in R to C++. The R and C++ code shown below is very similar (it only took a few minutes to convert the R version to the C++ version), but runs about 30 times faster on my computer. Dirk’s blog post also shows another way to make it even faster: using the faster random number generator functions in GSL (easily accessible from R through the RcppGSL package) can make it another two to three times faster.

The R code is as follows:

gibbs_r <- function(N, thin) {
  mat <- matrix(nrow = N, ncol = 2)
  x <- y <- 0
  for (i in 1:N) {
    for (j in 1:thin) {
      x <- rgamma(1, 3, y * y + 4)
      y <- rnorm(1, 1 / (x + 1), 1 / sqrt(2 * (x + 1)))
    }
    mat[i, ] <- c(x, y)
  }
  mat
}

This is relatively straightforward to convert to C++. We:

  • Add type declarations to all variables.

  • Use ( instead of [ to index into the matrix.

  • Include “Rmath.h” and call the functions with Rf_.

#include "cpp11/matrix.hpp"
#include "cpp11/doubles.hpp"
#include "Rmath.h"
using namespace cpp11;
namespace writable = cpp11::writable;

[[cpp11::register]] cpp11::doubles_matrix<> gibbs_cpp(int N, int thin) {
  writable::doubles_matrix<> mat(N, 2);
  double x = 0, y = 0;
  for (int i = 0; i < N; i++) {
    for (int j = 0; j < thin; j++) {
      x = Rf_rgamma(3., 1. / double(y * y + 4));
      y = Rf_rnorm(1. / (x + 1.), 1. / sqrt(2. * (x + 1.)));
    }
    mat(i, 0) = x;
    mat(i, 1) = y;
  }
  return mat;
}

Benchmarking the two implementations yields a significant speedup for running the loops in C++:

bench::mark(
  r = {
    set.seed(42)
    gibbs_r(100, 10)
  },
  cpp = {
    set.seed(42)
    gibbs_cpp(100, 10)
  },
  check = TRUE,
  relative = TRUE
)
#> # A tibble: 2 × 6
#>   expression   min median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <dbl>  <dbl>     <dbl>     <dbl>    <dbl>
#> 1 r           28.4   32.3       1       1251.      Inf
#> 2 cpp          1      1        33.2        1       NaN

R vectorisation versus C++ vectorisation

This example is adapted from “Rcpp is smoking fast for agent-based models in data frames”. The challenge is to predict a model response from three inputs. The basic R version of the predictor looks like:

vacc1a <- function(age, female, ily) {
  p <- 0.25 + 0.3 * 1 / (1 - exp(0.04 * age)) + 0.1 * ily
  p <- p * if (female) 1.25 else 0.75
  p <- max(0, p)
  p <- min(1, p)
  p
}

We want to be able to apply this function to many inputs, so we might write a vector-input version using a for loop.

vacc1 <- function(age, female, ily) {
  n <- length(age)
  out <- numeric(n)
  for (i in seq_len(n)) {
    out[i] <- vacc1a(age[i], female[i], ily[i])
  }
  out
}

If you’re familiar with R, you’ll have a gut feeling that this will be slow, and indeed it is. There are two ways we could attack this problem. If you have a good R vocabulary, you might immediately see how to vectorise the function (using ifelse(), pmin(), and pmax()). Alternatively, we could rewrite vacc1a() and vacc1() in C++, using our knowledge that loops and function calls have much lower overhead in C++.

Either approach is fairly straightforward. In R:

vacc2 <- function(age, female, ily) {
  p <- 0.25 + 0.3 * 1 / (1 - exp(0.04 * age)) + 0.1 * ily
  p <- p * ifelse(female, 1.25, 0.75)
  p <- pmax(0, p)
  p <- pmin(1, p)
  p
}

(If you’ve worked R a lot you might recognise some potential bottlenecks in this code: ifelse, pmin, and pmax are known to be slow, and could be replaced with p * 0.75 + p * 0.5 * female, p[p < 0] <- 0, p[p > 1] <- 1. You might want to try timing those variations.)

Or in C++:

#include "cpp11.hpp"
using namespace cpp11;
namespace writable = cpp11::writable;

[[cpp11::register]]
double vacc3a(double age, bool female, bool ily){
  double p = 0.25 + 0.3 * 1 / (1 - exp(0.04 * age)) + 0.1 * ily;
  p = p * (female ? 1.25 : 0.75);
  p = std::max(p, 0.0);
  p = std::min(p, 1.0);
  return p;
}

[[cpp11::register]]
doubles vacc3(doubles age, logicals female,
                    logicals ily) {
  int n = age.size();
  writable::doubles out(n);
  for(int i = 0; i < n; ++i) {
    out[i] = vacc3a(age[i], female[i], ily[i]);
  }
  return out;
}

We next generate some sample data, and check that all three versions return the same values:

n <- 1000
age <- rnorm(n, mean = 50, sd = 10)
female <- sample(c(T, F), n, rep = TRUE)
ily <- sample(c(T, F), n, prob = c(0.8, 0.2), rep = TRUE)
stopifnot(
  all.equal(vacc1(age, female, ily), vacc2(age, female, ily)),
  all.equal(vacc1(age, female, ily), vacc3(age, female, ily))
)

The original blog post forgot to do this, and introduced a bug in the C++ version: it used 0.004 instead of 0.04. Finally, we can benchmark our three approaches:

bench::mark(
  vacc1 = vacc1(age, female, ily),
  vacc2 = vacc2(age, female, ily),
  vacc3 = vacc3(age, female, ily)
)
#> # A tibble: 3 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 vacc1        1.51ms   1.76ms      558.    7.86KB     17.9
#> 2 vacc2       40.23µs  53.23µs    17729.  148.84KB     38.3
#> 3 vacc3       13.25µs  14.15µs    64369.   14.03KB     12.9

Not surprisingly, our original approach with loops is very slow. Vectorising in R gives a huge speedup, and we can eke out even more performance (about ten times) with the C++ loop. I was a little surprised that the C++ was so much faster, but it is because the R version has to create 11 vectors to store intermediate results, where the C++ code only needs to create 1.

Using cpp11 in a package

The same C++ code that is used with cpp_source() can also be bundled into a package. There are several benefits of moving code from a stand-alone C++ source file to a package:

  1. Your code can be made available to users without C++ development tools.

  2. Multiple source files and their dependencies are handled automatically by the R package build system.

  3. Packages provide additional infrastructure for testing, documentation, and consistency.

To add cpp11 to an existing package first put your C++ files in the src/ directory of your package. Then add the following to your DESCRIPTION file:

```
LinkingTo: cpp11
SystemRequirements: C++11
```

and add the following roxygen directive somewhere in your package’s R files. (A common location is R/pkgname-package.R)

```
#' @useDynLib pkgname, .registration = TRUE
```

You’ll then need to run devtools::document() to update your NAMESPACE file to include the useDynLib statement.

The easiest way to set this up is to call usethis::use_cpp11(), which will do the above steps for your automatically.

Before building the package, you’ll need to run cpp11::cpp_register(). This function scans the C++ files for [[cpp11::register]] attributes and generates the binding code required to make the functions available in R. Re-run cpp11::cpp_register() whenever functions are added, removed, or have their signatures changed. If you are using devtools to develop your package this is done automatically by the pkgbuild package when your package has LinkingTo: cpp11 in its DESCRIPTION file.

Learning more

C++ is a large, complex language that takes years to master. If you would like to dive deeper or write more complex functions other resources I’ve found helpful in learning C++ are:

  • Effective C++ and Effective STL

  • C++ Annotations, aimed at knowledgeable users of C (or any other language using a C-like grammar, like Perl or Java) who would like to know more about, or make the transition to, C++.

  • Algorithm Libraries, which provides a more technical, but still concise, description of important STL concepts. (Follow the links under notes.)

Writing performant code may also require you to rethink your basic approach: a solid understanding of basic data structures and algorithms is very helpful here. That’s beyond the scope of this vignette, but I’d suggest the Algorithm Design Manual MIT’s Introduction to Algorithms, Algorithms by Robert Sedgewick and Kevin Wayne which has a free online textbook and a matching Coursera course.

cpp11/inst/doc/cpp11.Rmd0000644000175000017500000012216114120442712014442 0ustar nileshnilesh--- title: "Get started with cpp11" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Get started with cpp11} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = as.logical(Sys.getenv("CPP11_EVAL", "false")) ) ``` *This content is adapted (with permission) from the [Rcpp chapter](https://adv-r.hadley.nz/rcpp.html) of Hadley Wickham's book Advanced R.* ## Introduction Sometimes R code just isn't fast enough. You've used profiling to figure out where your bottlenecks are, and you've done everything you can in R, but your code still isn't fast enough. In this vignette you'll learn how to improve performance by rewriting key functions in C++. This magic comes by way of the [cpp11](https://github.com/r-lib/cpp11) package. cpp11 makes it very simple to connect C++ to R. While it is _possible_ to write C or Fortran code for use in R, it will be painful by comparison. cpp11 provides a clean, approachable API that lets you write high-performance code, insulated from R's more complex C API. Typical bottlenecks that C++ can address include: * Loops that can't be easily vectorised because subsequent iterations depend on previous ones. * Recursive functions, or problems which involve calling functions millions of times. The overhead of calling a function in C++ is much lower than in R. * Problems that require advanced data structures and algorithms that R doesn't provide. Through the standard template library (STL), C++ has efficient implementations of many important data structures, from ordered maps to double-ended queues. The aim of this vignette is to discuss only those aspects of C++ and cpp11 that are absolutely necessary to help you eliminate bottlenecks in your code. We won't spend much time on advanced features like object-oriented programming or templates because the focus is on writing small, self-contained functions, not big programs. A working knowledge of C++ is helpful, but not essential. Many good tutorials and references are freely available, including and . For more advanced topics, the _Effective C++_ series by Scott Meyers is a popular choice. ### Outline * Section [intro](#intro) teaches you how to write C++ by converting simple R functions to their C++ equivalents. You'll learn how C++ differs from R, and what the key scalar, vector, and matrix classes are called. * Section [cpp_source](#cpp-source) shows you how to use `cpp11::cpp_source()` to load a C++ file from disk in the same way you use `source()` to load a file of R code. * Section [classes](#classes) discusses how to modify attributes from cpp11, and mentions some of the other important classes. * Section [na](#na) teaches you how to work with R's missing values in C++. * Section [stl](#stl) shows you how to use some of the most important data structures and algorithms from the standard template library, or STL, built-in to C++. * Section [case-studies](#case-studies) shows two real case studies where cpp11 was used to get considerable performance improvements. * Section [package](#package) teaches you how to add C++ code to an R package. * Section [more](#more) concludes the vignette with pointers to more resources to help you learn cpp11 and C++. ### Prerequisites We'll use [cpp11](https://github.com/r-lib/cpp11) to call C++ from R: ```{r setup} library(cpp11) ``` You'll also need a working C++ compiler. To get it: * On Windows, install [Rtools](https://CRAN.R-project.org/bin/windows/Rtools/). * On Mac, install Xcode from the app store. * On Linux, `sudo apt-get install r-base-dev` or similar. ## Getting started with C++ {#intro} `cpp_function()` allows you to write C++ functions in R: ```{r add} cpp_function('int add(int x, int y, int z) { int sum = x + y + z; return sum; }') # add works like a regular R function add add(1, 2, 3) ``` When you run the above code, cpp11 will compile the C++ code and construct an R function that connects to the compiled C++ function. There's a lot going on underneath the hood but cpp11 takes care of all the details so you don't need to worry about them. The following sections will teach you the basics by translating simple R functions to their C++ equivalents. We'll start simple with a function that has no inputs and a scalar output, and then make it progressively more complicated: * Scalar input and scalar output * Vector input and scalar output * Vector input and vector output * Matrix input and vector output ### No inputs, scalar output Let's start with a very simple function. It has no arguments and always returns the integer 1: ```{r one-r} one <- function() 1L ``` The equivalent C++ function is: ```cpp int one() { return 1; } ``` We can compile and use this from R with `cpp_function()` ```{r one-cpp} cpp_function('int one() { return 1; }') ``` This small function illustrates a number of important differences between R and C++: * The syntax to create a function looks like the syntax to call a function; you don't use assignment to create functions as you do in R. * You must declare the type of output the function returns. This function returns an `int` (a scalar integer). The classes for the most common types of R vectors are: `doubles`, `integers`, `strings`, and `logicals`. * Scalars and vectors are different. The scalar equivalents of numeric, integer, character, and logical vectors are: `double`, `int`, `String`, and `bool`. * You must use an explicit `return` statement to return a value from a function. * Every statement is terminated by a `;`. ### Scalar input, scalar output The next example function implements a scalar version of the `sign()` function which returns 1 if the input is positive, and -1 if it's negative: ```{r sign} sign_r <- function(x) { if (x > 0) { 1 } else if (x == 0) { 0 } else { -1 } } cpp_function('int sign_cpp(int x) { if (x > 0) { return 1; } else if (x == 0) { return 0; } else { return -1; } }') ``` In the C++ version: * We declare the type of each input in the same way we declare the type of the output. While this makes the code a little more verbose, it also makes clear the type of input the function needs. * The `if` syntax is identical --- while there are some big differences between R and C++, there are also lots of similarities! C++ also has a `while` statement that works the same way as R's. As in R you can use `break` to exit the loop, but to skip one iteration you need to use `continue` instead of `next`. ### Vector input, scalar output One big difference between R and C++ is that the cost of loops is much lower in C++. For example, we could implement the `sum` function in R using a loop. If you've been programming in R a while, you'll probably have a visceral reaction to this function! ```{r sum-r} sum_r <- function(x) { total <- 0 for (i in seq_along(x)) { total <- total + x[i] } total } ``` In C++, loops have very little overhead, so it's fine to use them. In Section [stl](#stl), you'll see alternatives to `for` loops that more clearly express your intent; they're not faster, but they can make your code easier to understand. ```{r sum-cpp} cpp_function('double sum_cpp(doubles x) { int n = x.size(); double total = 0; for(int i = 0; i < n; ++i) { total += x[i]; } return total; }') ``` The C++ version is similar, but: * To find the length of the vector, we use the `.size()` method, which returns an integer. C++ methods are called with `.` (i.e., a full stop). * The `for` statement has a different syntax: `for(init; check; increment)`. This loop is initialised by creating a new variable called `i` with value 0. Before each iteration we check that `i < n`, and terminate the loop if it's not. After each iteration, we increment the value of `i` by one, using the special prefix operator `++` which increases the value of `i` by 1. * In C++, vector indices start at 0, which means that the last element is at position `n - 1`. I'll say this again because it's so important: __IN C++, VECTOR INDICES START AT 0__! This is a very common source of bugs when converting R functions to C++. * Use `=` for assignment, not `<-`. * C++ provides operators that modify in-place: `total += x[i]` is equivalent to `total = total + x[i]`. Similar in-place operators are `-=`, `*=`, and `/=`. This is a good example of where C++ is much more efficient than R. As shown by the following microbenchmark, `sumC()` is competitive with the built-in (and highly optimised) `sum()`, while `sumR()` is several orders of magnitude slower. ```{r sum-bench} x <- runif(1e3) bench::mark( sum(x), sum_cpp(x), sum_r(x) )[1:6] ``` ### Vector input, vector output Next we'll create a function that computes the Euclidean distance between a value and a vector of values: ```{r pdist-r} pdist_r <- function(x, ys) { sqrt((x - ys) ^ 2) } ``` In R, it's not obvious that we want `x` to be a scalar from the function definition, and we'd need to make that clear in the documentation. That's not a problem in the C++ version because we have to be explicit about types: ```{r pdist-cpp} cpp_function('doubles pdist_cpp(double x, doubles ys) { int n = ys.size(); writable::doubles out(n); for(int i = 0; i < n; ++i) { out[i] = sqrt(pow(ys[i] - x, 2.0)); } return out; }') ``` This function introduces a few new concepts: * Because we are creating a new vector we need to use `writable::doubles` rather than the read-only `doubles`. * We create a new numeric vector of length `n` with a constructor: `cpp11::writable::doubles out(n)`. Another useful way of making a vector is to copy an existing one: `cpp11::doubles zs(ys)`. * C++ uses `pow()`, not `^`, for exponentiation. Note that because the R version is fully vectorised, it's already going to be fast. ```{r} y <- runif(1e6) bench::mark( pdist_r(0.5, y), pdist_cpp(0.5, y) )[1:6] ``` On my computer, it takes around 5 ms with a 1 million element `y` vector. The C++ function is about 2.5 times faster, ~2 ms, but assuming it took you 10 minutes to write the C++ function, you'd need to run it ~200,000 times to make rewriting worthwhile. The reason why the C++ function is faster is subtle, and relates to memory management. The R version needs to create an intermediate vector the same length as y (`x - ys`), and allocating memory is an expensive operation. The C++ function avoids this overhead because it uses an intermediate scalar. ```{r, include = FALSE} # 5e-3 * x == 2e-3 * x + 10 * 60 600 / (5e-3 - 2e-3) ``` ### Using cpp_source {#cpp-source} So far, we've used inline C++ with `cpp_function()`. This makes presentation simpler, but for real problems, it's usually easier to use stand-alone C++ files and then source them into R using `cpp_source()`. This lets you take advantage of text editor support for C++ files (e.g., syntax highlighting) as well as making it easier to identify the line numbers in compilation errors. Your stand-alone C++ file should have extension `.cpp`, and needs to start with: ```cpp #include "cpp11.hpp" using namespace cpp11; ``` And for each function that you want available within R, you need to prefix it with: ```cpp [[cpp11::register]] ``` If you're familiar with roxygen2, you might wonder how this relates to `@export`. `cpp11::register` registers a C++ function to be called from R. `@export` controls whether a function is exported from a package and made available to the user. To compile the C++ code, use `cpp_source("path/to/file.cpp")`. This will create the matching R functions and add them to your current session. Note that these functions can not be saved in a `.Rdata` file and reloaded in a later session; they must be recreated each time you restart R. This example also illustrates a different kind of a `for` loop, a for-each loop. ```{cpp11} #include "cpp11/doubles.hpp" using namespace cpp11; [[cpp11::register]] double mean_cpp(doubles x) { int n = x.size(); double total = 0; for(double value : x) { total += value; } return total / n; } ``` NB: if you run this code, you'll notice that `mean_cpp()` is faster than the built-in `mean()`. This is because it trades numerical accuracy for speed. For the remainder of this vignette C++ code will be presented stand-alone rather than wrapped in a call to `cpp_function`. If you want to try compiling and/or modifying the examples you should paste them into a C++ source file that includes the elements described above. This is easy to do in RMarkdown by using `{cpp11}` instead of `{r}` at the beginning of your code blocks. ### Exercises 1. With the basics of C++ in hand, it's now a great time to practice by reading and writing some simple C++ functions. For each of the following functions, read the code and figure out what the corresponding base R function is. You might not understand every part of the code yet, but you should be able to figure out the basics of what the function does. ```{cpp11} #include "cpp11.hpp" using namespace cpp11; namespace writable = cpp11::writable; [[cpp11::register]] double f1(doubles x) { int n = x.size(); double y = 0; for(int i = 0; i < n; ++i) { y += x[i] / n; } return y; } [[cpp11::register]] doubles f2(doubles x) { int n = x.size(); writable::doubles out(n); out[0] = x[0]; for(int i = 1; i < n; ++i) { out[i] = out[i - 1] + x[i]; } return out; } [[cpp11::register]] bool f3(logicals x) { int n = x.size(); for(int i = 0; i < n; ++i) { if (x[i]) { return true; } } return false; } [[cpp11::register]] int f4(cpp11::function pred, list x) { int n = x.size(); for(int i = 0; i < n; ++i) { logicals res(pred(x[i])); if (res[0]) { return i + 1; } } return 0; } ``` 1. To practice your function writing skills, convert the following functions into C++. For now, assume the inputs have no missing values. 1. `all()`. 2. `cumprod()`, `cummin()`, `cummax()`. 3. `diff()`. Start by assuming lag 1, and then generalise for lag `n`. 4. `range()`. 5. `var()`. Read about the approaches you can take on [Wikipedia](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance). Whenever implementing a numerical algorithm, it's always good to check what is already known about the problem. ## Other classes {#classes} You've already seen the basic vector classes (`integers`, `doubles`, `logicals`, `strings`) and their scalar (`int`, `double`, `bool`, `string`) equivalents. cpp11 also provides wrappers for other base data types. The most important are for lists and data frames, functions, and attributes, as described below. ### Lists and data frames cpp11 also provides `list` and `data_frame` classes, but they are more useful for output than input. This is because lists and data frames can contain arbitrary classes but C++ needs to know their classes in advance. If the list has known structure (e.g., it's an S3 object), you can extract the components and manually convert them to their C++ equivalents with `as_cpp()`. For example, the object created by `lm()`, the function that fits a linear model, is a list whose components are always of the same type. The following code illustrates how you might extract the mean percentage error (`mpe()`) of a linear model. This isn't a good example of when to use C++, because it's so easily implemented in R, but it shows how to work with an important S3 class. Note the use of `Rf_inherits()` and the `stop()` to check that the object really is a linear model. ```{cpp11} #include "cpp11.hpp" using namespace cpp11; [[cpp11::register]] double mpe(list mod) { if (!Rf_inherits(mod, "lm")) { stop("Input must be a linear model"); } doubles resid(mod["residuals"]); doubles fitted(mod["fitted.values"]); int n = resid.size(); double err = 0; for(int i = 0; i < n; ++i) { err += resid[i] / (fitted[i] + resid[i]); } return err / n; } ``` ```{r} mod <- lm(mpg ~ wt, data = mtcars) mpe(mod) ``` ### Functions {#functions-cpp11} You can put R functions in an object of type `function`. This makes calling an R function from C++ straightforward. The only challenge is that we don't know what type of output the function will return, so we use the catchall type `sexp`. This stands for S-Expression and is used as the type of all R Objects in the internal C code. ```{cpp11} #include "cpp11.hpp" using namespace cpp11; namespace writable = cpp11::writable; [[cpp11::register]] sexp call_with_one(function f) { return f(1); } ``` ```{r} call_with_one(function(x) x + 1) call_with_one(paste) ``` Calling R functions with positional arguments is obvious: ```cpp f("y", 1); ``` But you need a special syntax for named arguments: ```cpp using namespace cpp11::literals; f("x"_nm = "y", "value"_nm = 1); ``` ### Attributes All R objects have attributes, which can be queried and modified with `.attr()`. cpp11 also provides `.names()` as an alias for the `names` attribute. The following code snippet illustrates these methods. Note the use of `{}` [initializer list](https://en.cppreference.com/w/cpp/utility/initializer_list) syntax. This allows you to create an R vector from C++ scalar values: ```{r attribs, engine = "cpp11"} #include "cpp11.hpp" using namespace cpp11; namespace writable = cpp11::writable; [[cpp11::register]] doubles attribs() { writable::doubles out = {1., 2., 3.}; out.names() = {"a", "b", "c"}; out.attr("my-attr") = "my-value"; out.attr("class") = "my-class"; return out; } ``` ## Missing values {#na} If you're working with missing values, you need to know two things: * How R's missing values behave in C++'s scalars (e.g., `double`). * How to get and set missing values in vectors (e.g., `doubles`). ### Scalars The following code explores what happens when you take one of R's missing values, coerce it into a scalar, and then coerce back to an R vector. Note that this kind of experimentation is a useful way to figure out what any operation does. ```{r missings, engine = "cpp11"} #include "cpp11.hpp" using namespace cpp11; [[cpp11::register]] list scalar_missings() { int int_s = NA_INTEGER; r_string chr_s = NA_STRING; bool lgl_s = NA_LOGICAL; double num_s = NA_REAL; return writable::list({as_sexp(int_s), as_sexp(chr_s), as_sexp(lgl_s), as_sexp(num_s)}); } ``` ```{r} str(scalar_missings()) ``` With the exception of `bool`, things look pretty good here: all of the missing values have been preserved. However, as we'll see in the following sections, things are not quite as straightforward as they seem. #### Integers With integers, missing values are stored as the smallest integer. If you don't do anything to them, they'll be preserved. But, since C++ doesn't know that the smallest integer has this special behaviour, if you do anything to it you're likely to get an incorrect value: for example, `cpp_eval('NA_INTEGER + 1')` gives -2147483647. So if you want to work with missing values in integers, either use a length 1 `integers` or be very careful with your code. #### Doubles With doubles, you may be able to get away with ignoring missing values and working with NaNs (not a number). This is because R's NA is a special type of IEEE 754 floating point number NaN. So any logical expression that involves a NaN (or in C++, NAN) always evaluates as FALSE: ```{r} cpp_eval("NAN == 1") cpp_eval("NAN < 1") cpp_eval("NAN > 1") cpp_eval("NAN == NAN") ``` (Here I'm using `cpp_eval()` which allows you to see the result of running a single C++ expression, making it excellent for this sort of interactive experimentation.) But be careful when combining them with Boolean values: ```{r} cpp_eval("NAN && TRUE") cpp_eval("NAN || FALSE") ``` However, in numeric contexts NaNs will propagate NAs: ```{r} cpp_eval("NAN + 1") cpp_eval("NAN - 1") cpp_eval("NAN / 1") cpp_eval("NAN * 1") ``` ### Strings `String` is a scalar string class introduced by cpp11, so it knows how to deal with missing values. ### Boolean C++'s `bool` has two possible values (`true` or `false`), a logical vector in R has three (`TRUE`, `FALSE`, and `NA`). If you coerce a length 1 logical vector, make sure it doesn't contain any missing values; otherwise they will be converted to TRUE. One way to fix this is to use `int` instead, as this can represent `TRUE`, `FALSE`, and `NA`. ### Vectors {#vectors-cpp11} With vectors, you need to use a missing value specific to the type of vector, `NA_REAL`, `NA_INTEGER`, `NA_LOGICAL`, `NA_STRING`: ```{r, engine = "cpp11"} #include "cpp11.hpp" using namespace cpp11; namespace writable = cpp11::writable; [[cpp11::register]] list missing_sampler() { return writable::list({ writable::doubles({NA_REAL}), writable::integers({NA_INTEGER}), writable::logicals({r_bool(NA_LOGICAL)}), writable::strings({NA_STRING}) }); } ``` ```{r} str(missing_sampler()) ``` ### Exercises 1. Rewrite any of the functions from the first exercise to deal with missing values. If `na_rm` is true, ignore the missing values. If `na_rm` is false, return a missing value if the input contains any missing values. Some good functions to practice with are `min()`, `max()`, `range()`, `mean()`, and `var()`. 1. Rewrite `cumsum()` and `diff()` so they can handle missing values. Note that these functions have slightly more complicated behaviour. ## Standard Template Library {#stl} The real strength of C++ is revealed when you need to implement more complex algorithms. The standard template library (STL) provides a set of extremely useful data structures and algorithms. This section will explain some of the most important algorithms and data structures and point you in the right direction to learn more. I can't teach you everything you need to know about the STL, but hopefully the examples will show you the power of the STL, and persuade you that it's useful to learn more. If you need an algorithm or data structure that isn't implemented in STL, one place to look is [boost](https://www.boost.org/doc/). Installing boost on your computer is beyond the scope of this vignette, but once you have it installed, you can use boost data structures and algorithms by including the appropriate header file with (e.g.) `#include `. ### Using iterators Iterators are used extensively in the STL: many functions either accept or return iterators. They are the next step up from basic loops, abstracting away the details of the underlying data structure. Iterators have three main operators: 1. Advance with `++`. 1. Get the value they refer to, or __dereference__, with `*`. 1. Compare with `==`. For example we could re-write our sum function using iterators: ```{r, engine = "cpp11"} #include "cpp11.hpp" using namespace cpp11; [[cpp11::register]] double sum2(doubles x) { double total = 0; for(auto it = x.begin(); it != x.end(); ++it) { total += *it; } return total; } ``` The main changes are in the for loop: * We start at `x.begin()` and loop until we get to `x.end()`. A small optimization is to store the value of the end iterator so we don't need to look it up each time. This only saves about 2 ns per iteration, so it's only important when the calculations in the loop are very simple. * Instead of indexing into x, we use the dereference operator to get its current value: `*it`. * Notice we use `auto` rather than giving the type of the iterator. This code can be simplified still further through the use of a C++11 feature: range-based for loops. ```{r, engine = "cpp11"} #include "cpp11.hpp" using namespace cpp11; [[cpp11::register]] double sum3(doubles xs) { double total = 0; for(auto x : xs) { total += x; } return total; } ``` Iterators also allow us to use the C++ equivalents of the apply family of functions. For example, we could again rewrite `sum()` to use the `accumulate()` function, which takes a starting and an ending iterator, and adds up all the values in the vector. The third argument to `accumulate` gives the initial value: it's particularly important because this also determines the data type that `accumulate` uses (so we use `0.0` and not `0` so that `accumulate` uses a `double`, not an `int`.). To use `accumulate()` we need to include the `` header. ```{r, engine = "cpp11"} #include #include "cpp11.hpp" using namespace cpp11; [[cpp11::register]] double sum4(doubles x) { return std::accumulate(x.begin(), x.end(), 0.0); } ``` ```{r, include = FALSE, error = FALSE} # Verify that our sum implementations work local({ x <- c(.5, .1, .3, .7, 12.) stopifnot(identical(sum(x), sum2(x))) stopifnot(identical(sum(x), sum3(x))) stopifnot(identical(sum(x), sum4(x))) }) ``` ### Algorithms The `` header provides a large number of algorithms that work with iterators. A good reference is available at . For example, we could write a basic cpp11 version of `findInterval()` that takes two arguments, a vector of values and a vector of breaks, and locates the bin that each x falls into. This shows off a few more advanced iterator features. Read the code below and see if you can figure out how it works. ```{r, engine = "cpp11"} #include #include "cpp11.hpp" using namespace cpp11; [[cpp11::register]] integers findInterval2(doubles x, doubles breaks) { writable::integers out(x.size()); auto out_it = out.begin(); for (auto&& val : x) { auto pos = std::upper_bound(breaks.begin(), breaks.end(), val); *out_it = std::distance(breaks.begin(), pos); ++out_it; } return out; } ``` ```{r, include = FALSE, error = FALSE} # Verify that our findInterval2 implementation works local({ n <- 1e3 x <- sort(round(stats::rt(n, df = 2), 2)) tt <- c(-n, seq(-2, 2, length = n + 1), n) stopifnot(identical(findInterval(tt, x), findInterval2(tt, x))) }) ``` The key points are: * We step through two iterators (input and output) simultaneously. * We can assign into an dereferenced iterator (`out_it`) to change the values in `out`. * `upper_bound()` returns an iterator. If we wanted the value of the `upper_bound()` we could dereference it; to figure out its location, we use the `distance()` function. When in doubt, it is generally better to use algorithms from the STL than hand rolled loops. In _Effective STL_, Scott Meyers gives three reasons: efficiency, correctness, and maintainability. Algorithms from the STL are written by C++ experts to be extremely efficient, and they have been around for a long time so they are well tested. Using standard algorithms also makes the intent of your code more clear, helping to make it more readable and more maintainable. ### Data structures {#data-structures-cpp11} The STL provides a large set of data structures: `array`, `bitset`, `list`, `forward_list`, `map`, `multimap`, `multiset`, `priority_queue`, `queue`, `deque`, `set`, `stack`, `unordered_map`, `unordered_set`, `unordered_multimap`, `unordered_multiset`, and `vector`. The most important of these data structures are the `vector`, the `unordered_set`, and the `unordered_map`. We'll focus on these three in this section, but using the others is similar: they just have different performance trade-offs. For example, the `deque` (pronounced "deck") has a very similar interface to vectors but a different underlying implementation that has different performance trade-offs. You may want to try it for your problem. A good reference for STL data structures is --- I recommend you keep it open while working with the STL. cpp11 knows how to convert from many STL data structures to their R equivalents, so you can return them from your functions without explicitly converting to R data structures. ### Vectors {#vectors-stl} An STL vector is very similar to an R vector, except that it grows efficiently. This makes STL vectors appropriate to use when you don't know in advance how big the output will be. Vectors are templated, which means that you need to specify the type of object the vector will contain when you create it: `vector`, `vector`, `vector`, `vector`. You can access individual elements of a vector using the standard `[]` notation, and you can add a new element to the end of the vector using `.push_back()`. If you have some idea in advance how big the vector will be, you can use `.reserve()` to allocate sufficient storage. The following code implements run length encoding (`rle()`). It produces two vectors of output: a vector of values, and a vector `lengths` giving how many times each element is repeated. It works by looping through the input vector `x` comparing each value to the previous: if it's the same, then it increments the last value in `lengths`; if it's different, it adds the value to the end of `values`, and sets the corresponding length to 1. ```{r, engine = "cpp11"} #include "cpp11.hpp" #include using namespace cpp11; namespace writable = cpp11::writable; [[cpp11::register]] list rle_cpp(doubles x) { std::vector lengths; std::vector values; // Initialise first value int i = 0; double prev = x[0]; values.push_back(prev); lengths.push_back(1); for(auto it = x.begin() + 1; it != x.end(); ++it) { if (prev == *it) { lengths[i]++; } else { values.push_back(*it); lengths.push_back(1); i++; prev = *it; } } return writable::list({ "lengths"_nm = lengths, "values"_nm = values }); } ``` (An alternative implementation would be to replace `i` with the iterator `lengths.rbegin()` which always points to the last element of the vector. You might want to try implementing that.) Other methods of a vector are described at . ### Sets Sets maintain a unique set of values, and can efficiently tell if you've seen a value before. They are useful for problems that involve duplicates or unique values (like `unique`, `duplicated`, or `in`). C++ provides both ordered (`std::set`) and unordered sets (`std::unordered_set`), depending on whether or not order matters for you. Unordered sets can somtimes be much faster (because they use a hash table internally rather than a tree). Often even if you need an ordered set, you could consider using an unordered set and then sorting the output. Benchmarking with your expected dataset is the best way to determine which is fastest for your data. Like vectors, sets are templated, so you need to request the appropriate type of set for your purpose: `unordered_set`, `unordered_set`, etc. More details are available at and . The following function uses an unordered set to implement an equivalent to `duplicated()` for integer vectors. Note the use of `seen.insert(x[i]).second`. `insert()` returns a pair, the `.first` value is an iterator that points to element and the `.second` value is a Boolean that's true if the value was a new addition to the set. ```{r, engine = "cpp11"} #include #include "cpp11.hpp" using namespace cpp11; namespace writable = cpp11::writable; [[cpp11::register]] logicals duplicated_cpp(integers x) { std::unordered_set seen; int n = x.size(); writable::logicals out(n); for (int i = 0; i < n; ++i) { out[i] = !seen.insert(x[i]).second; } return out; } ``` ### Exercises To practice using the STL algorithms and data structures, implement the following using R functions in C++, using the hints provided: 1. `median.default()` using `partial_sort`. 1. `%in%` using `unordered_set` and the `find()` or `count()` methods. 1. `unique()` using an `unordered_set` (challenge: do it in one line!). 1. `min()` using `std::min()`, or `max()` using `std::max()`. 1. `which.min()` using `min_element`, or `which.max()` using `max_element`. 1. `setdiff()`, `union()`, and `intersect()` for integers using sorted ranges and `set_union`, `set_intersection` and `set_difference`. ## Case studies {#case-studies} The following case studies illustrate some real life uses of C++ to replace slow R code. ### Gibbs sampler The following case study updates an example [blogged about](http://dirk.eddelbuettel.com/blog/2011/07/14/) by Dirk Eddelbuettel, illustrating the conversion of a Gibbs sampler in R to C++. The R and C++ code shown below is very similar (it only took a few minutes to convert the R version to the C++ version), but runs about 30 times faster on my computer. Dirk's blog post also shows another way to make it even faster: using the faster random number generator functions in GSL (easily accessible from R through the RcppGSL package) can make it another two to three times faster. The R code is as follows: ```{r} gibbs_r <- function(N, thin) { mat <- matrix(nrow = N, ncol = 2) x <- y <- 0 for (i in 1:N) { for (j in 1:thin) { x <- rgamma(1, 3, y * y + 4) y <- rnorm(1, 1 / (x + 1), 1 / sqrt(2 * (x + 1))) } mat[i, ] <- c(x, y) } mat } ``` This is relatively straightforward to convert to C++. We: * Add type declarations to all variables. * Use `(` instead of `[` to index into the matrix. * Include "Rmath.h" and call the functions with `Rf_`. ```{r, engine = "cpp11"} #include "cpp11/matrix.hpp" #include "cpp11/doubles.hpp" #include "Rmath.h" using namespace cpp11; namespace writable = cpp11::writable; [[cpp11::register]] cpp11::doubles_matrix<> gibbs_cpp(int N, int thin) { writable::doubles_matrix<> mat(N, 2); double x = 0, y = 0; for (int i = 0; i < N; i++) { for (int j = 0; j < thin; j++) { x = Rf_rgamma(3., 1. / double(y * y + 4)); y = Rf_rnorm(1. / (x + 1.), 1. / sqrt(2. * (x + 1.))); } mat(i, 0) = x; mat(i, 1) = y; } return mat; } ``` Benchmarking the two implementations yields a significant speedup for running the loops in C++: ```{r} bench::mark( r = { set.seed(42) gibbs_r(100, 10) }, cpp = { set.seed(42) gibbs_cpp(100, 10) }, check = TRUE, relative = TRUE ) ``` ### R vectorisation versus C++ vectorisation This example is adapted from ["Rcpp is smoking fast for agent-based models in data frames"](https://gweissman.github.io/post/rcpp-is-smoking-fast-for-agent-based-models-in-data-frames/). The challenge is to predict a model response from three inputs. The basic R version of the predictor looks like: ```{r} vacc1a <- function(age, female, ily) { p <- 0.25 + 0.3 * 1 / (1 - exp(0.04 * age)) + 0.1 * ily p <- p * if (female) 1.25 else 0.75 p <- max(0, p) p <- min(1, p) p } ``` We want to be able to apply this function to many inputs, so we might write a vector-input version using a for loop. ```{r} vacc1 <- function(age, female, ily) { n <- length(age) out <- numeric(n) for (i in seq_len(n)) { out[i] <- vacc1a(age[i], female[i], ily[i]) } out } ``` If you're familiar with R, you'll have a gut feeling that this will be slow, and indeed it is. There are two ways we could attack this problem. If you have a good R vocabulary, you might immediately see how to vectorise the function (using `ifelse()`, `pmin()`, and `pmax()`). Alternatively, we could rewrite `vacc1a()` and `vacc1()` in C++, using our knowledge that loops and function calls have much lower overhead in C++. Either approach is fairly straightforward. In R: ```{r} vacc2 <- function(age, female, ily) { p <- 0.25 + 0.3 * 1 / (1 - exp(0.04 * age)) + 0.1 * ily p <- p * ifelse(female, 1.25, 0.75) p <- pmax(0, p) p <- pmin(1, p) p } ``` (If you've worked R a lot you might recognise some potential bottlenecks in this code: `ifelse`, `pmin`, and `pmax` are known to be slow, and could be replaced with `p * 0.75 + p * 0.5 * female`, `p[p < 0] <- 0`, `p[p > 1] <- 1`. You might want to try timing those variations.) Or in C++: ```{r engine = "cpp11"} #include "cpp11.hpp" using namespace cpp11; namespace writable = cpp11::writable; [[cpp11::register]] double vacc3a(double age, bool female, bool ily){ double p = 0.25 + 0.3 * 1 / (1 - exp(0.04 * age)) + 0.1 * ily; p = p * (female ? 1.25 : 0.75); p = std::max(p, 0.0); p = std::min(p, 1.0); return p; } [[cpp11::register]] doubles vacc3(doubles age, logicals female, logicals ily) { int n = age.size(); writable::doubles out(n); for(int i = 0; i < n; ++i) { out[i] = vacc3a(age[i], female[i], ily[i]); } return out; } ``` We next generate some sample data, and check that all three versions return the same values: ```{r} n <- 1000 age <- rnorm(n, mean = 50, sd = 10) female <- sample(c(T, F), n, rep = TRUE) ily <- sample(c(T, F), n, prob = c(0.8, 0.2), rep = TRUE) stopifnot( all.equal(vacc1(age, female, ily), vacc2(age, female, ily)), all.equal(vacc1(age, female, ily), vacc3(age, female, ily)) ) ``` The original blog post forgot to do this, and introduced a bug in the C++ version: it used `0.004` instead of `0.04`. Finally, we can benchmark our three approaches: ```{r} bench::mark( vacc1 = vacc1(age, female, ily), vacc2 = vacc2(age, female, ily), vacc3 = vacc3(age, female, ily) ) ``` Not surprisingly, our original approach with loops is very slow. Vectorising in R gives a huge speedup, and we can eke out even more performance (about ten times) with the C++ loop. I was a little surprised that the C++ was so much faster, but it is because the R version has to create 11 vectors to store intermediate results, where the C++ code only needs to create 1. ## Using cpp11 in a package {#package} The same C++ code that is used with `cpp_source()` can also be bundled into a package. There are several benefits of moving code from a stand-alone C++ source file to a package: 1. Your code can be made available to users without C++ development tools. 1. Multiple source files and their dependencies are handled automatically by the R package build system. 1. Packages provide additional infrastructure for testing, documentation, and consistency. To add `cpp11` to an existing package first put your C++ files in the `src/` directory of your package. Then add the following to your `DESCRIPTION` file: ``` LinkingTo: cpp11 SystemRequirements: C++11 ``` and add the following [roxygen](https://roxygen2.r-lib.org/) directive somewhere in your package's R files. (A common location is `R/pkgname-package.R`) ``` #' @useDynLib pkgname, .registration = TRUE ``` You'll then need to run [`devtools::document()`](https://devtools.r-lib.org/reference/document.html) to update your `NAMESPACE` file to include the `useDynLib` statement. The easiest way to set this up is to call `usethis::use_cpp11()`, which will do the above steps for your automatically. Before building the package, you'll need to run `cpp11::cpp_register()`. This function scans the C++ files for `[[cpp11::register]]` attributes and generates the binding code required to make the functions available in R. Re-run `cpp11::cpp_register()` whenever functions are added, removed, or have their signatures changed. If you are using `devtools` to develop your package this is done automatically by the pkgbuild package when your package has `LinkingTo: cpp11` in its DESCRIPTION file. ## Learning more {#more} C++ is a large, complex language that takes years to master. If you would like to dive deeper or write more complex functions other resources I've found helpful in learning C++ are: * [_Effective C++_](https://www.aristeia.com/books.html) and [_Effective STL_](https://www.aristeia.com/books.html) * [_C++ Annotations_](http://www.icce.rug.nl/documents/cplusplus/cplusplus.html), aimed at knowledgeable users of C (or any other language using a C-like grammar, like Perl or Java) who would like to know more about, or make the transition to, C++. * [_Algorithm Libraries_](https://www.cs.helsinki.fi/u/tpkarkka/alglib/k06/), which provides a more technical, but still concise, description of important STL concepts. (Follow the links under notes.) Writing performant code may also require you to rethink your basic approach: a solid understanding of basic data structures and algorithms is very helpful here. That's beyond the scope of this vignette, but I'd suggest the [_Algorithm Design Manual_](https://www.algorist.com/) MIT's [_Introduction to Algorithms_](https://web.archive.org/web/20200604134756/https://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-046j-introduction-to-algorithms-sma-5503-fall-2005/), _Algorithms_ by Robert Sedgewick and Kevin Wayne which has a free [online textbook](http://algs4.cs.princeton.edu/home/) and a matching [Coursera course](https://www.coursera.org/learn/algorithms-part1). cpp11/inst/doc/motivations.Rmd0000644000175000017500000005251514140020654016076 0ustar nileshnilesh--- title: "Motivations for cpp11" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Motivations for cpp11} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = as.logical(Sys.getenv("CPP11_EVAL", "false")) ) print_cpp <- function(filename) { cat("```c++", readLines(filename), "```", sep = "\n") } library(cpp11) should_run_benchmarks <- function(x) { get("requireNamespace")("cpp11test", quietly = TRUE) && asNamespace("cpp11test")$should_run_benchmarks() } ``` # Motivations R and S have a long history of interacting with compiled languages. In fact the original version of S written in the late 1970s was mainly a wrapper around FORTRAN routines. [(History-of-S)](https://www.r-project.org/conferences/useR-2006/Slides/Chambers.pdf) Released in 2000, the [cxx](https://cran.r-project.org/package=cxx) package was an early prototype of C++ bindings to R. [Rcpp](https://cran.r-project.org/package=Rcpp) was first published to CRAN in 2008, and [Rcpp11](https://cran.r-project.org/package=Rcpp11) in 2014. Of these `Rcpp` has by far the widest adoption, with over 2000 reverse dependencies as of 2020. Rcpp has been a widely successful project, however over the years a number of issues and additional C++ features have arisen. Adding these features to Rcpp would require a great deal of work, or in some cases would be impossible without severely breaking backwards compatibility. cpp11 is a ground up rewrite of C++ bindings to R with different design trade-offs and features. Changes that motivated cpp11 include: - Enforcing [copy-on-write semantics](#copy-on-write-semantics). - Improving the [safety](#improve-safety) of using the R API from C++ code. - Supporting [ALTREP objects](#altrep-support). - Using [UTF-8 strings](#utf-8-everywhere) everywhere. - Applying newer [C++11 features](#c11-features). - Having a more straightforward, [simpler implementation](#simpler-implementation). - Faster [compilation time](#compilation-speed) with lower memory requirements. - Being *completely* [header only](#header-only) to avoid ABI issues. - Capable of [vendoring](#vendoring) if desired. - More robust [protection](#protection) using a much more efficient linked list data structure. - [Growing vectors](#growing-vectors) more efficiently. ## Copy-on-write semantics R uses [copy-on-write](https://adv-r.hadley.nz/names-values.html#copy-on-modify) (also called copy-on-modify) semantics. Lets say you have two variables `x` and `y` that both point to the same underlying data. ```{r} x <- c(1, 2, 3) y <- x ``` If you modify `y`, R will first copy the values of `x` to a new position, then point `y` to the new location and only after the copy modify `y`. This allows `x` to retain the original values. ```{r} y[[3]] <- 4 y x ``` C++ does not have copy-on-write built into the language, however it has related concepts, copy-by-value and copy-by-reference. Copy-by-value works similarly to R, except that R only copies when something is changed, C++ _always_ copies. ```cpp int x = 42; int y = x; y = 0; // x is still == 42 ``` Copy-by-reference does the opposite, both `x` and `y` always point to the *same* underlying value. In C++ you specify a reference with `&`. ```cpp int x = 42; int &y = x; y = 0; // both x and y are now 0 ``` Copy-by-reference is a valuable technique, as it avoids the overhead of copying the data. However it can also lead to errors when internal functions change their inputs unexpectedly. Rcpp uses copy-by-reference by default (even if you pass a Rcpp vector class by value). This gives Rcpp functions completely different semantics from normal R functions. We can illustrate this by creating a Rcpp function that multiples its input vector by 2. ```{Rcpp} #include "Rcpp.h" using namespace Rcpp; // [[Rcpp::export]] NumericVector times_two_rcpp(NumericVector x) { for (int i = 0; i < x.size(); ++i) { x[i] = x[i] * 2; } return x; } ``` If you do this with regular R functions, you will see the value of `y` is `x` * 2, but the value of `x` is unchanged. ```{r} x <- c(1, 2, 3) y <- x * 2 y x ``` However if we now call our `times_two_rcpp()` function we get the right output value, but now `x` is *also changed*. ```{r} z <- times_two_rcpp(x) z x ``` cpp11 strives to make its functions behave similarly to normal R functions, while preserving the speed of Rcpp when read only access is needed. Each of the r_vector classes in cpp11 has a normal *read only* version that uses copy-by-reference, and a *writable* version which uses copy-by-value. ```{cpp11} #include "cpp11/doubles.hpp" [[cpp11::register]] cpp11::doubles times_two_cpp11(cpp11::writable::doubles x) { for (int i = 0; i < x.size(); ++i) { x[i] = x[i] * 2; } return x; } ``` Using `cpp11::writable::doubles` first *copies* the input vector, so when we do the multiplication we do not modify the original data. ```{r} x <- c(1, 2, 3) z <- times_two_cpp11(x) z x ``` ## Improve safety Internally R is written in C, not C++. In general C and C++ work well together, a large part of C++'s success is due to its high interoperability with C code. However one area in which C and C++ are generally *not* interoperable is error handling. In C++ the most common way to handle errors is with [exceptions](https://isocpp.org/wiki/faq/exceptions). Exceptions provide a clean, safe way for objects to obtain and cleanup resources automatically even when errors occur. ### C safety The C language does not have support for exceptions, so error handling is done a variety of ways. These include error codes like [errno](https://en.cppreference.com/w/c/error/errno), conditional statements, and in the R codebase the [longjmp](http://www.cplusplus.com/reference/csetjmp/longjmp/) function. `longjmp`, which stands for 'long jump' is a function that allows you to transfer the control flow of a program to another location elsewhere in the program. R uses long jumps extensively in its error handling routines. If an R function is executing and an error occurs, a long jump is called which 'jumps' the control flow into the error handling code. Crucially long jumps are *incompatible* with C++ [destructors](https://isocpp.org/wiki/faq/dtors). If a long jump occurs the destructors of any active C++ objects are not run, and therefore any resources (such as memory, file handles, etc.) managed by those objects will cause a [resource leak](https://en.wikipedia.org/wiki/Resource_leak). For example, the following unsafe code would leak the memory allocated in the C++ `std::vector` `x` when the R API function `Rf_allocVector()` fails (since you can't create a vector of `-1` size). ```cpp std::vector x({1., 2., 3.}); SEXP y = PROTECT(Rf_allocVector(REALSXP, -1)); ``` cpp11 provides two mechanisms to make interfacing with Rs C API and C++ code safer. `cpp11::unwind_protect()` takes a functional object (a C++11 lamdba function or `std::function`) and converts any C long jumps encountered to C++ exceptions. Now instead of a C long jump happening when the `Rf_allocVector()` call fails, a C++ exception occurs, which *does* trigger the `std::vector` destructor, so that memory is automatically released. ```cpp std::vector x({1., 2., 3.}); SEXP y; unwind_protect([]() { y = Rf_allocVector(REALSXP, -1); }) ``` `cpp11::safe()` is a more concise way to wrap a particular R API function with `unwind_protect()`. ```cpp std::vector x({1., 2., 3.}); SEXP y = PROTECT(safe[Rf_allocVector](REALSXP, -1)); ``` Again using `cpp11::safe()` converts the C long jump to a C++ exception, so the memory is automatically released. cpp11 uses these mechanisms extensively internally when calling the R C API, which make cpp11 much safer against resource leaks than using Rcpp or calling Rs C API by hand. ### C++ safety In the inverse of C safety we also need to ensure that C++ exceptions do not reach the C call stack, as they will terminate R if that occurs. Like Rcpp, cpp11 automatically generates `try / catch` guards around registered functions to prevent this and also converts C++ exceptions into normal R errors. This is done without developer facing code changes. With both C and C++ sides of the coin covered we can safely use R's C API and C++ code together with C++ objects without leaking resources. ## Altrep support [ALTREP](https://svn.r-project.org/R/branches/ALTREP/ALTREP.html) which stands for **ALT**ernative **REP**resntations is a feature introduced in R 3.5. ALTREP allows R internals and package authors to define alternative ways of representing data to R. One example of the use of altrep is the `:` operator. Prior to R 3.5 `:` generated a full vector for the entire sequence. e.g. `1:1000` would require 1000 individual values. As of R 3.5 this sequence is instead represented by an ALTREP vector, so *none* of the values actually exist in memory. Instead each time R access a particular value in the sequence that value is computed on-the-fly. This saves memory and excution time, and allows users to use sequences which would otherwise be too big to fit in memory. ```{r, R.options = list(max.print = 20)} 1:1e9 ``` Because Rcpp predates the introduction of ALTREP, it does not support the interfaces needed to access ALTREP objects. This means the objects must be converted to normal R objects as soon as they are used by Rcpp. ```{Rcpp} #include "Rcpp.h" // [[Rcpp::export]] Rcpp::IntegerVector identity_rcpp(Rcpp::IntegerVector x) { return x; } ``` ```{r} x <- identity_rcpp(1:100000) lobstr::obj_size(x) ``` Whereas cpp11 objects preserve the ALTREP object. ```{cpp11} #include "cpp11/integers.hpp" [[cpp11::register]] cpp11::integers identity_cpp11(cpp11::integers x) { return x; } ``` ```{r} y <- identity_cpp11(1:100000) lobstr::obj_size(y) ``` ### Altrep benchmarks In these benchmarks note that Rcpp allocates memory for the ALTREP vectors. This is because Rcpp implicitly converts them into normal R vectors. cpp11 retains them as ALTREP vectors, so no additional memory is needed. `foreach` and `accumulate` both use iterators that take advantage of `REAL_GET_REGION` to buffer queries. This makes them faster than naive C-style for loops with ALTREP vectors. The for2 case shows an optimization you can use if you know at compile-time that you won't be dealing with ALTREP vectors. By specifying `false` to the second argument (`is_altrep`), you can disable the ALTREP support. This causes the ALTREP conditional code to be compiled out resulting in loop unrolling (and speeds) identical to that generated by Rcpp. ```{r, message = FALSE, results = 'asis', eval = should_run_benchmarks()} library(cpp11test) cases <- expand.grid( len = 3e6, vector = c("normal", "altrep"), method = c("for", "foreach", "accumulate"), pkg = c("cpp11", "rcpp"), stringsAsFactors = FALSE ) # Add special case cases <- rbind(list(len = 3e6, vector = "normal", method = "for2", pkg = "cpp11"), cases) b_sum <- bench::press( .grid = cases, { seq_real <- function(x) as.numeric(seq_len(x)) funs <- c("normal" = rnorm, "altrep" = seq_real) x <- funs[[vector]](len) fun <- match.fun(sprintf("%ssum_dbl_%s_", ifelse(pkg == "cpp11", "", paste0(pkg, "_")), method)) bench::mark( fun(x) ) } )[c("pkg", "method", "vector", "min", "median", "mem_alloc", "itr/sec", "n_gc")] saveRDS(b_sum, "sum.Rds", version = 2) ``` ```{r} knitr::kable(readRDS("sum.Rds")) ``` [cpp11test/src/sum.cpp](https://github.com/r-lib/cpp11/blob/main/cpp11test/src/sum.cpp) contains the code ran in these benchmarks. ## UTF-8 everywhere R has complicated support for Unicode strings and non-ASCII code pages, whose behavior often differs substantially on different operating systems, particularly Windows. Correctly dealing with this is challenging and often feels like whack a mole. To combat this complexity cpp11 uses the [UTF-8 everywhere](http://utf8everywhere.org/) philosophy. This means that whenever text data is converted from R data structures to C++ data structures by cpp11 the data is translated into UTF-8. Conversely any text data coming from C++ code is assumed to be UTF-8 and marked as such for R. Doing this universally avoids many locale specific issues when dealing with Unicode text. Concretely cpp11 always uses `Rf_translateCharUTF8()` when obtaining `const char*` from `CHRSXP` objects and uses `Rf_mkCharCE(, CE_UTF8)` when creating new `CHRSXP` objects from `const char*` inputs. ## C++11 features C++11 provides a host of new features to the C++ language. cpp11 uses a number of these including - [move semantics](https://en.cppreference.com/w/cpp/language/move_constructor) - [type traits](https://en.cppreference.com/w/cpp/header/type_traits) - [initializer_list](https://en.cppreference.com/w/cpp/utility/initializer_list) - [variadic templates / parameter packs](https://en.cppreference.com/w/cpp/language/parameter_pack) - [user defined literals](https://en.cppreference.com/w/cpp/language/user_literal) - [user defined attributes](https://en.cppreference.com/w/cpp/language/attributes) ## Simpler implementation Rcpp is very ambitious, with a number of advanced features, including [modules](https://cran.r-project.org/package=Rcpp/vignettes/Rcpp-modules.pdf), [sugar](https://cran.r-project.org/package=Rcpp/vignettes/Rcpp-sugar.pdf) and extensive support for [attributes](https://CRAN.R-project.org/package=Rcpp/vignettes/Rcpp-attributes.pdf). While these are useful features, many R packages do not use one or any of these advanced features. In addition the code needed to support these features is complex and can be challenging to maintain. cpp11 takes a more limited scope, providing only the set of r_vector wrappers for R vector types, coercion methods to and from C++ and the limited attributes necessary to support use in R packages. ```{r, eval = FALSE, include = FALSE} # count lines for Rcpp headers (excluding comments) # brew install cloc git clone https://github.com/RcppCore/Rcpp.git cd Rcpp git checkout 1.0.4 cloc inst/include # count lines for Rcpp headers without generated code cloc --fullpath --not-match-f '.*generated.*' inst/include # count lines for cpp11 headers git clone https://github.com/r-lib/cpp11.git cd cpp11 cloc inst/include # get primary authors of Rcpp git ls-files -- inst/include | while read f; do git blame -w --line-porcelain -- "$f" | grep -I '^author '; done | sort -f | uniq -ic | sort -nr ``` This limited scope allows the implementation to be much simpler, the headers in Rcpp 1.0.4 have 74,658 lines of code (excluding blank or commented lines) in 379 files. Some headers in Rcpp are automatically generated, removing these still gives you 25,249 lines of code in 357 files. In contrast the headers in cpp11 contain only 1,734 lines of code in 19 files. This reduction in complexity should make cpp11 an easier project to maintain and ensure correctness, particularly around interactions with the R garbage collector. ## Compilation speed Rcpp always bundles all of its headers together, which causes slow compilation times and high peak memory usage when compiling. The headers in cpp11 are more easily decoupled, so you only can include only the particular headers you actually use in a source file. This can significantly improve the compilation speed and memory usage to compile your package. Here are some real examples of the reduction in compile time and peak memory usage after converting packages to cpp11. ```{r, eval = FALSE, include = FALSE} # brew install gtime # CC=gcc-9 CXX=g++-9 CXX11=g++-9 gtime -f %M:%e R CMD INSTALL --libs-only --use-vanilla . ``` | package | Rcpp compile time | cpp11 compile time | Rcpp peak memory | cpp11 peak memory | Rcpp commit | cpp11 commit | | --- | --- | --- | --- | --- | --- | --- | | haven | 17.42s | 7.13s | 428MB | 204MB | [a3cf75a4][haven] | [978cb034][haven] | | readr | 124.13s | 81.08s | 969MB | 684MB | [ec0d8989][readr] | [aa89ff72][readr] | | roxygen2 | 17.34s | 4.24s | 371MB | 109MB | [6f081b75][roxygen2] | [e8e1e22d][roxygen2] | | tidyr | 14.25s | 3.34s | 363MB | 83MB | [3899ed51][tidyr] | [60f7c7d4][tidyr] | [haven]: https://github.com/tidyverse/haven/compare/a3cf75a4...978cb034 [readr]: https://github.com/tidyverse/readr/compare/ec0d8989...aa89ff72 [roxygen2]: https://github.com/r-lib/roxygen2/compare/6f081b75...e8e1e22d [tidyr]: https://github.com/tidyverse/tidyr/compare/3899ed51...60f7c7d4 ## Header only Rcpp has long been a *mostly* [header only](https://en.wikipedia.org/wiki/Header-only) library, however is not a *completely* header only library. There have been [cases](https://github.com/tidyverse/dplyr/issues/2308) when a package was first installed with version X of Rcpp, and then a newer version of Rcpp was later installed. Then when the original package X was loaded R would crash, because the [Application Binary Interface](https://en.wikipedia.org/wiki/Application_binary_interface) of Rcpp had changed between the two versions. Because cpp11 consists of exclusively headers this issue does not occur. ## Vendoring In the go community the concept of [vendoring](https://go.googlesource.com/proposal/+/master/design/25719-go15vendor.md) is widespread. Vendoring means that you copy the code for the dependencies into your project's source tree. This ensures the dependency code is fixed and stable until it is updated. Because cpp11 is fully [header only](#header-only) you can vendor the code in the same way. `cpp11::vendor_cpp11()` is provided to do this if you choose. Vendoring has advantages and drawbacks however. The advantage is that changes to the cpp11 project could never break your existing code. The drawbacks are both minor, your package size is now slightly larger, and major, you no longer get bugfixes and new features until you explicitly update cpp11. I think the majority of packages should use `LinkingTo: cpp11` and _not_ vendor the cpp11 dependency. However, vendoring can be appropriate for certain situations. ## Protection cpp11 uses a custom double linked list data structure to track objects it is managing. This structure is much more efficient for large numbers of objects than using `R_PreserveObject()` / `R_ReleaseObjects()` as is done in Rcpp. ```{r, message = FALSE, eval = should_run_benchmarks()} library(cpp11test) grid <- expand.grid(len = c(10 ^ (2:5), 2e5), pkg = c("cpp11", "rcpp"), stringsAsFactors = FALSE) b_release <- bench::press(.grid = grid, { fun = match.fun(sprintf("%s_release_", pkg)) bench::mark( fun(len), iterations = 1 ) } )[c("len", "pkg", "min")] saveRDS(b_release, "release.Rds", version = 2) ``` ```{r, echo = FALSE, dev = "svg", fig.ext = "svg", eval = capabilities("cairo")} b_release <- readRDS("release.Rds") library(ggplot2) ggplot(b_release, aes(x = len, y = min / len, color = pkg)) + geom_point() + geom_line() + bench::scale_y_bench_time(base = NULL) + scale_x_continuous(labels = scales::comma)+ labs( tite = "cpp11 uses constant time protection", x = "Number of protected objects", y = "Average time to release protection on one object" ) ``` This plot shows the average time to protect and release a given object is essentially constant for cpp11. Whereas it is linear or worse with the number of objects being tracked for Rcpp. ```{r, echo = FALSE} knitr::kable(b_release) ``` ## Growing vectors One major difference in Rcpp and cpp11 is how vectors are grown. Rcpp vectors have a `push_back()` method, but unlike `std::vector()` no additional space is reserved when pushing. This makes calling `push_back()` repeatably very expensive, as the entire vector has to be copied each call. In contrast `cpp11` vectors grow efficiently, reserving extra space. Because of this you can do ~10,000,000 vector appends with cpp11 in approximately the same amount of time that Rcpp does 10,000, as this benchmark demonstrates. ```{r, message = FALSE, eval = should_run_benchmarks()} grid <- expand.grid(len = 10 ^ (0:7), pkg = "cpp11", stringsAsFactors = FALSE) grid <- rbind( grid, expand.grid(len = 10 ^ (0:4), pkg = "rcpp", stringsAsFactors = FALSE) ) b_grow <- bench::press(.grid = grid, { fun = match.fun(sprintf("%sgrow_", ifelse(pkg == "cpp11", "", paste0(pkg, "_")))) bench::mark( fun(len) ) } )[c("len", "pkg", "min", "mem_alloc", "n_itr", "n_gc")] saveRDS(b_grow, "growth.Rds", version = 2) ``` ```{r, echo = FALSE, dev = "svg", fig.ext = "svg", eval = capabilities("cairo")} b_grow <- readRDS("growth.Rds") library(ggplot2) ggplot(b_grow, aes(x = len, y = min, color = pkg)) + geom_point() + geom_line() + bench::scale_y_bench_time() + scale_x_log10( breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x)) ) + coord_fixed() + theme(panel.grid.minor = element_blank()) + labs(title = "log-log plot of vector size vs construction time", x = NULL, y = NULL) ``` ```{r, echo = FALSE} knitr::kable(b_grow) ``` ## Conclusion Rcpp has been and will continue to be widely successful. cpp11 is a alternative implementation of C++ bindings to R that chooses different design trade-offs and features. Both packages can co-exist (even be used in the same package!) and continue to enrich the R community. cpp11/inst/doc/motivations.R0000644000175000017500000001267314151206455015565 0ustar nileshnilesh## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = as.logical(Sys.getenv("CPP11_EVAL", "false")) ) print_cpp <- function(filename) { cat("```c++", readLines(filename), "```", sep = "\n") } library(cpp11) should_run_benchmarks <- function(x) { get("requireNamespace")("cpp11test", quietly = TRUE) && asNamespace("cpp11test")$should_run_benchmarks() } ## ----------------------------------------------------------------------------- x <- c(1, 2, 3) y <- x ## ----------------------------------------------------------------------------- y[[3]] <- 4 y x ## ----------------------------------------------------------------------------- x <- c(1, 2, 3) y <- x * 2 y x ## ----------------------------------------------------------------------------- z <- times_two_rcpp(x) z x ## ----------------------------------------------------------------------------- x <- c(1, 2, 3) z <- times_two_cpp11(x) z x ## ---- R.options = list(max.print = 20)---------------------------------------- 1:1e9 ## ----------------------------------------------------------------------------- x <- identity_rcpp(1:100000) lobstr::obj_size(x) ## ----------------------------------------------------------------------------- y <- identity_cpp11(1:100000) lobstr::obj_size(y) ## ---- message = FALSE, results = 'asis', eval = should_run_benchmarks()------- # library(cpp11test) # # cases <- expand.grid( # len = 3e6, # vector = c("normal", "altrep"), # method = c("for", "foreach", "accumulate"), # pkg = c("cpp11", "rcpp"), # stringsAsFactors = FALSE # ) # # # Add special case # cases <- rbind(list(len = 3e6, vector = "normal", method = "for2", pkg = "cpp11"), cases) # # b_sum <- bench::press( # .grid = cases, # { # seq_real <- function(x) as.numeric(seq_len(x)) # funs <- c("normal" = rnorm, "altrep" = seq_real) # x <- funs[[vector]](len) # fun <- match.fun(sprintf("%ssum_dbl_%s_", ifelse(pkg == "cpp11", "", paste0(pkg, "_")), method)) # bench::mark( # fun(x) # ) # } # )[c("pkg", "method", "vector", "min", "median", "mem_alloc", "itr/sec", "n_gc")] # # saveRDS(b_sum, "sum.Rds", version = 2) ## ----------------------------------------------------------------------------- knitr::kable(readRDS("sum.Rds")) ## ---- eval = FALSE, include = FALSE------------------------------------------- # # count lines for Rcpp headers (excluding comments) # # brew install cloc # git clone https://github.com/RcppCore/Rcpp.git # cd Rcpp # git checkout 1.0.4 # cloc inst/include # # # count lines for Rcpp headers without generated code # cloc --fullpath --not-match-f '.*generated.*' inst/include # # # count lines for cpp11 headers # git clone https://github.com/r-lib/cpp11.git # cd cpp11 # cloc inst/include # # # get primary authors of Rcpp # git ls-files -- inst/include | while read f; do git blame -w --line-porcelain -- "$f" | grep -I '^author '; done | sort -f | uniq -ic | sort -nr ## ---- eval = FALSE, include = FALSE------------------------------------------- # # brew install gtime # # CC=gcc-9 CXX=g++-9 CXX11=g++-9 # gtime -f %M:%e R CMD INSTALL --libs-only --use-vanilla . ## ---- message = FALSE, eval = should_run_benchmarks()------------------------- # library(cpp11test) # grid <- expand.grid(len = c(10 ^ (2:5), 2e5), pkg = c("cpp11", "rcpp"), stringsAsFactors = FALSE) # b_release <- bench::press(.grid = grid, # { # fun = match.fun(sprintf("%s_release_", pkg)) # bench::mark( # fun(len), # iterations = 1 # ) # } # )[c("len", "pkg", "min")] # saveRDS(b_release, "release.Rds", version = 2) ## ---- echo = FALSE, dev = "svg", fig.ext = "svg", eval = capabilities("cairo")---- b_release <- readRDS("release.Rds") library(ggplot2) ggplot(b_release, aes(x = len, y = min / len, color = pkg)) + geom_point() + geom_line() + bench::scale_y_bench_time(base = NULL) + scale_x_continuous(labels = scales::comma)+ labs( tite = "cpp11 uses constant time protection", x = "Number of protected objects", y = "Average time to release protection on one object" ) ## ---- echo = FALSE------------------------------------------------------------ knitr::kable(b_release) ## ---- message = FALSE, eval = should_run_benchmarks()------------------------- # grid <- expand.grid(len = 10 ^ (0:7), pkg = "cpp11", stringsAsFactors = FALSE) # grid <- rbind( # grid, # expand.grid(len = 10 ^ (0:4), pkg = "rcpp", stringsAsFactors = FALSE) # ) # b_grow <- bench::press(.grid = grid, # { # fun = match.fun(sprintf("%sgrow_", ifelse(pkg == "cpp11", "", paste0(pkg, "_")))) # bench::mark( # fun(len) # ) # } # )[c("len", "pkg", "min", "mem_alloc", "n_itr", "n_gc")] # saveRDS(b_grow, "growth.Rds", version = 2) ## ---- echo = FALSE, dev = "svg", fig.ext = "svg", eval = capabilities("cairo")---- b_grow <- readRDS("growth.Rds") library(ggplot2) ggplot(b_grow, aes(x = len, y = min, color = pkg)) + geom_point() + geom_line() + bench::scale_y_bench_time() + scale_x_log10( breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x)) ) + coord_fixed() + theme(panel.grid.minor = element_blank()) + labs(title = "log-log plot of vector size vs construction time", x = NULL, y = NULL) ## ---- echo = FALSE------------------------------------------------------------ knitr::kable(b_grow) cpp11/inst/doc/FAQ.Rmd0000644000175000017500000001421314120423440014120 0ustar nileshnilesh--- title: "FAQ" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{FAQ} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(cpp11) ``` Below are some Frequently Asked Questions about cpp11. If you have a question that you think would fit well here please [open an issue](https://github.com/r-lib/cpp11/issues/new/choose). #### 1. What are the underlying types of cpp11 objects? | vector | element | | --- | --- | | cpp11::integers | int | | cpp11::doubles | double | | cpp11::logical | cpp11::r_bool | | cpp11::strings | cpp11::r_string | | cpp11::raws | uint8_t | | cpp11::list | SEXP | #### 2. How do I add elements to a named list? Use the `push_back()` method with the named literal syntax. The named literal syntax is defined in the `cpp11::literals` namespace. ```{cpp11} #include [[cpp11::register]] cpp11::list foo_push() { using namespace cpp11::literals; cpp11::writable::list x; x.push_back({"foo"_nm = 1}); return x; } ``` #### 3. Does cpp11 support default arguments? cpp11 does not support default arguments, while convenient they would require more complexity to support than is currently worthwhile. If you need default argument support you can use a wrapper function around your cpp11 registered function. A common convention is to name the internal function with a trailing `_`. ```{cpp11} #include [[cpp11::register]] double add_some_(double x, double amount) { return x + amount; } ``` ```{r} add_some <- function(x, amount = 1) { add_some_(x, amount) } add_some(1) add_some(1, amount = 5) ``` #### 4. How do I create a new empty list? Define a new writable list object. `cpp11::writable::list x;` #### 5. How do I retrieve (named) elements from a named vector/list? Use the `[]` accessor function. `x["foo"]` #### 6. How can I tell whether a vector is named? Use the `named()` method for vector classes. ```{cpp11} #include [[cpp11::register]] bool is_named(cpp11::strings x) { return x.named(); } ``` ```{r} is_named("foo") is_named(c(x = "foo")) ``` #### 7. How do I return a `cpp11::writable::logicals` object with only a `FALSE` value? You need to use [list initialization](https://en.cppreference.com/w/cpp/language/list_initialization) with `{}` to create the object. ```{cpp11} #include [[cpp11::register]] cpp11::writable::logicals my_false() { return {FALSE}; } [[cpp11::register]] cpp11::writable::logicals my_true() { return {TRUE}; } [[cpp11::register]] cpp11::writable::logicals my_both() { return {TRUE, FALSE, TRUE}; } ``` ```{r} my_false() my_true() my_both() ``` #### 8. How do I create a new empty environment? To do this you need to call the `base::new.env()` function from C++. This can be done by creating a `cpp11::function` object and then calling it to generate the new environment. ```{cpp11} #include [[cpp11::register]] cpp11::environment create_environment() { cpp11::function new_env(cpp11::package("base")["new.env"]); return new_env(); } ``` #### 9. How do I assign and retrieve values in an environment? What happens if I try to get a value that doesn't exist? Use `[]` to retrieve or assign values from an environment by name. If a value does not exist it will return `R_UnboundValue`. ```{cpp11} #include [[cpp11::register]] bool foo_exists(cpp11::environment x) { return x["foo"] != R_UnboundValue; } [[cpp11::register]] void set_foo(cpp11::environment x, double value) { x["foo"] = value; } ``` ```{r} x <- new.env() foo_exists(x) set_foo(x, 1) foo_exists(x) ``` #### 10. How can I create a `cpp11:raws` from a `std::string`? There is no built in way to do this. One method would be to `push_back()` each element of the string individually. ```{cpp11} #include [[cpp11::register]] cpp11::raws push_raws() { std::string x("hi"); cpp11::writable::raws out; for (auto c : x) { out.push_back(c); } return out; } ``` ```{r} push_raws() ``` #### 11. How can I create a `std::string` from a `cpp11::writable::string`? Because C++ does not allow for two implicit cast, explicitly cast to `cpp11::r_string` first. ```{cpp11} #include #include [[cpp11::register]] std::string my_string() { cpp11::writable::strings x({"foo", "bar"}); std::string elt = cpp11::r_string(x[0]); return elt; } ``` #### 12. What are the types for C++ iterators? The iterators are `::iterator` classes contained inside the vector classes. For example the iterator for `cpp11::doubles` would be `cpp11::doubles::iterator` and the iterator for `cpp11::writable::doubles` would be `cpp11::writable::doubles::iterator`. #### 13. My code has `using namespace std`, why do I still have to include `std::` in the signatures of `[[cpp11::register]]` functions? The `using namespace std` directive will not be included in the generated code of the function signatures, so they still need to be fully qualified. However you will _not_ need to qualify the type names within those functions. The following won't compile ```{cpp11, eval = FALSE} #include #include using namespace std; [[cpp11::register]] string foobar() { return string("foo") + "-bar"; } ``` But this will compile and work as intended ```{cpp11} #include #include using namespace std; [[cpp11::register]] std::string foobar() { return string("foo") + "-bar"; } ``` #### 14. How do I modify a vector in place? In place modification breaks the normal semantics of R code. In general it should be avoided, which is why `cpp11::writable` classes always copy their data when constructed. However if you are _positive_ in-place modification is necessary for your use case you can use the move constructor to do this. ```{cpp11} #include [[cpp11::register]] void add_one(cpp11::sexp x_sexp) { cpp11::writable::integers x(std::move(x_sexp.data())); for (auto&& value : x) { ++value; } } ``` ```{r} x <- c(1L, 2L, 3L, 4L) .Internal(inspect(x)) add_one(x) .Internal(inspect(x)) x ``` cpp11/inst/doc/converting.html0000644000175000017500000030155114151206373016125 0ustar nileshnilesh Converting from Rcpp

Converting from Rcpp

In many cases there is no need to convert a package from Rcpp. If the code is already written and you don’t have a very compelling need to use cpp11 I would recommend you continue to use Rcpp. However if you do feel like your project will benefit from using cpp11 this vignette will provide some guidance and doing the conversion.

It is also a place to highlight some of the largest differences between Rcpp and cpp11.

Class comparison table

Rcpp cpp11 (read-only) cpp11 (writable) cpp11 header
NumericVector doubles writable::doubles <cpp11/doubles.hpp>
IntegerVector integers writable::integers <cpp11/integers.hpp>
CharacterVector strings writable::strings <cpp11/strings.hpp>
RawVector raws writable::raws <cpp11/raws.hpp>
List list writable::list <cpp11/list.hpp>
RObject sexp <cpp11/sexp.hpp>
XPtr external_pointer <cpp11/external_pointer.hpp>
Environment environment <cpp11/environment.hpp>
Function function <cpp11/function.hpp>
Environment (namespace) package <cpp11/function.hpp>
wrap as_sexp <cpp11/as.hpp>
as as_cpp <cpp11/as.hpp>
stop stop <cpp11/protect.hpp>
checkUserInterrupt check_user_interrupt <cpp11/protect.hpp>

Incomplete list of Rcpp features not included in cpp11

  • None of Modules
  • None of Sugar
  • Some parts of Attributes
    • No dependencies
    • No random number generator restoration
    • No support for roxygen2 comments
    • No interfaces

Read-only vs writable vectors

The largest difference between cpp11 and Rcpp classes is that Rcpp classes modify their data in place, whereas cpp11 classes require copying the data to a writable class for modification.

The default classes, e.g. cpp11::doubles are read-only classes that do not permit modification. If you want to modify the data you need to use the classes in the cpp11::writable namespace, e.g. cpp11::writable::doubles.

In addition use the writable variants if you need to create a new R vector entirely in C++.

Fewer implicit conversions

Rcpp also allows very flexible implicit conversions, e.g. if you pass a REALSXP to a function that takes a Rcpp::IntegerVector() it is implicitly converted to a INTSXP. These conversions are nice for usability, but require (implicit) duplication of the data, with the associated runtime costs.

cpp11 throws an error in these cases. If you want the implicit coercions you can add a call to as.integer() or as.double() as appropriate from R when you call the function.

Calling R functions from C++

Calling R functions from C++ is similar to using Rcpp.

Rcpp::Function as_tibble("as_tibble", Rcpp::Environment::namespace_env("tibble"));
as_tibble(x, Rcpp::Named(".rows", num_rows), Rcpp::Named(".name_repair", name_repair));
using namespace cpp11::literals; // so we can use ""_nm syntax

auto as_tibble = cpp11::package("tibble")["as_tibble"];
as_tibble(x, ".rows"_nm = num_rows, ".name_repair"_nm = name_repair);

Appending behavior

One major difference in Rcpp and cpp11 is how vectors are grown. Rcpp vectors have a push_back() method, but unlike std::vector() no additional space is reserved when pushing. This makes calling push_back() repeatably very expensive, as the entire vector has to be copied each call.

In contrast cpp11 vectors grow efficiently, reserving extra space. Because of this you can do ~10,000,000 vector appends with cpp11 in approximately the same amount of time that Rcpp does 10,000, as this benchmark demonstrates.

library(cpp11test)
grid <- expand.grid(len = 10 ^ (0:7), pkg = "cpp11", stringsAsFactors = FALSE)
grid <- rbind(
  grid,
  expand.grid(len = 10 ^ (0:4), pkg = "rcpp", stringsAsFactors = FALSE)
)
b_grow <- bench::press(.grid = grid,
  {
    fun = match.fun(sprintf("%sgrow_", ifelse(pkg == "cpp11", "", paste0(pkg, "_"))))
    bench::mark(
      fun(len)
    )
  }
)[c("len", "pkg", "min", "mem_alloc", "n_itr", "n_gc")]
saveRDS(b_grow, "growth.Rds", version = 2)

len pkg min mem_alloc n_itr n_gc
1e+00 cpp11 3.3µs 0B 10000 0
1e+01 cpp11 6.05µs 0B 9999 1
1e+02 cpp11 8.49µs 1.89KB 10000 0
1e+03 cpp11 14.18µs 16.03KB 9999 1
1e+04 cpp11 63.77µs 256.22KB 3477 2
1e+05 cpp11 443.32µs 2MB 404 5
1e+06 cpp11 3.99ms 16MB 70 3
1e+07 cpp11 105.51ms 256MB 1 5
1e+00 rcpp 2.64µs 0B 10000 0
1e+01 rcpp 3.13µs 0B 9999 1
1e+02 rcpp 13.87µs 42.33KB 9997 3
1e+03 rcpp 440.77µs 3.86MB 319 1
1e+04 rcpp 54.13ms 381.96MB 2 2

Random Number behavior

Rcpp unconditionally includes calls to GetRNGstate() and PutRNGstate() before each wrapped function. This ensures that if any C++ code calls the R API functions unif_rand(), norm_rand(), exp_rand() or R_unif_index() the random seed state is set accordingly. cpp11 does not do this, so you must include the calls to GetRNGstate() and PutRNGstate() yourself if you use any of those functions in your C++ code. See R-exts 6.3 - Random number generation for details on these functions.

One convenient way to do safely is to use a simple class:

class local_rng {
public:
  local_rng() {
    GetRNGstate();
  }

  ~local_rng(){
    PutRNGstate();
  }
};

void foo() {
  local_rng rng_state;
  /* my code using the RNG */
}

Mechanics of converting a package from Rcpp

  1. Add cpp11 to LinkingTo
  2. Add C++11 to SystemRequirements
  3. Convert all instances of // [[Rcpp::export]] to [[cpp11::register]]
  4. Clean and recompile the package, e.g. pkgbuild::clean_dll() pkgload::load_all()
  5. Run tests devtools::test()
  6. Start converting function by function
    • Remember you can usually inter-convert between cpp11 and Rcpp classes by going through SEXP if needed.
    • Converting the code a bit at a time (and regularly running your tests) is the best way to do the conversion correctly and make progress
    • Doing a separate commit after converting each file (or possibly each function) can make finding any regressions with git bisect much easier in the future.

Common issues when converting

STL includes

Rcpp.h includes a number of STL headers automatically, notably <string> and <vector>, however the cpp11 headers generally do not. If you have errors like

error: no type named ‘string’ in namespace ‘std’

You will need to include the appropriate STL header, in this case <string>.

R API includes

cpp11 conflicts with macros declared by some R headers unless the macros R_NO_REMAP and STRICT_R_HEADERS are defined. If you include cpp11/R.hpp before any R headers these macros will be defined appropriately, otherwise you may see errors like

R headers were included before cpp11 headers and at least one of R_NO_REMAP or STRICT_R_HEADERS was not defined.

Which indicate that you must either change your include order or add preprocessor definitions for R_NO_REMAP and STRICT_R_HEADERS. Note that transitive includes of R headers (for example, those included by Rcpp.h) can also introduce the conflicting macros.

Type aliases

If you use typedefs for cpp11 types or define custom types you will need to define them in a pkgname_types.hpp file so that cpp_register() can include it in the generated code.

cpp11::stop() and cpp11::warning() with std::string

cpp11::stop() and cpp11::warning() are thin wrappers around Rf_stop() and Rf_warning(). These are simple C functions with a printf() API, so do not understand C++ objects like std::string. Therefore you need to call obj.c_str() when passing character data to them.

Logical vector construction

If you are constructing a length 1 logical vector you may need to explicitly use a r_bool() object in the initializer list rather than TRUE, FALSE or NA_INTEGER. This issue only occurs with the clang compiler, not gcc. When constructing vectors with more than one element this is not an issue

// bad
cpp11::writable::logicals({FALSE});

// good
cpp11::writable::logicals({r_bool(FALSE)});

// good
cpp11::writable::logicals({FALSE, NA_LOGICAL});
cpp11/inst/include/0000755000175000017500000000000014077027144013736 5ustar nileshnileshcpp11/inst/include/fmt/0000755000175000017500000000000014077027144014524 5ustar nileshnileshcpp11/inst/include/fmt/format-inl.h0000644000175000017500000031277014077027144016757 0ustar nileshnilesh// Formatting library for C++ - implementation // // Copyright (c) 2012 - 2016, Victor Zverovich // All rights reserved. // // For the license information refer to format.h. #ifndef FMT_FORMAT_INL_H_ #define FMT_FORMAT_INL_H_ #include #include #include // errno #include #include #include #include // std::memmove #include #include #ifndef FMT_STATIC_THOUSANDS_SEPARATOR # include #endif #ifdef _WIN32 # include // _isatty #endif #include "format.h" FMT_BEGIN_NAMESPACE namespace detail { FMT_FUNC void assert_fail(const char* file, int line, const char* message) { // Use unchecked std::fprintf to avoid triggering another assertion when // writing to stderr fails std::fprintf(stderr, "%s:%d: assertion failed: %s", file, line, message); // Chosen instead of std::abort to satisfy Clang in CUDA mode during device // code pass. std::terminate(); } #ifndef _MSC_VER # define FMT_SNPRINTF snprintf #else // _MSC_VER inline int fmt_snprintf(char* buffer, size_t size, const char* format, ...) { va_list args; va_start(args, format); int result = vsnprintf_s(buffer, size, _TRUNCATE, format, args); va_end(args); return result; } # define FMT_SNPRINTF fmt_snprintf #endif // _MSC_VER FMT_FUNC void format_error_code(detail::buffer& out, int error_code, string_view message) FMT_NOEXCEPT { // Report error code making sure that the output fits into // inline_buffer_size to avoid dynamic memory allocation and potential // bad_alloc. out.try_resize(0); static const char SEP[] = ": "; static const char ERROR_STR[] = "error "; // Subtract 2 to account for terminating null characters in SEP and ERROR_STR. size_t error_code_size = sizeof(SEP) + sizeof(ERROR_STR) - 2; auto abs_value = static_cast>(error_code); if (detail::is_negative(error_code)) { abs_value = 0 - abs_value; ++error_code_size; } error_code_size += detail::to_unsigned(detail::count_digits(abs_value)); auto it = buffer_appender(out); if (message.size() <= inline_buffer_size - error_code_size) format_to(it, FMT_STRING("{}{}"), message, SEP); format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code); FMT_ASSERT(out.size() <= inline_buffer_size, ""); } FMT_FUNC void report_error(format_func func, int error_code, const char* message) FMT_NOEXCEPT { memory_buffer full_message; func(full_message, error_code, message); // Don't use fwrite_fully because the latter may throw. if (std::fwrite(full_message.data(), full_message.size(), 1, stderr) > 0) std::fputc('\n', stderr); } // A wrapper around fwrite that throws on error. inline void fwrite_fully(const void* ptr, size_t size, size_t count, FILE* stream) { size_t written = std::fwrite(ptr, size, count, stream); if (written < count) FMT_THROW(system_error(errno, "cannot write to file")); } #ifndef FMT_STATIC_THOUSANDS_SEPARATOR template locale_ref::locale_ref(const Locale& loc) : locale_(&loc) { static_assert(std::is_same::value, ""); } template Locale locale_ref::get() const { static_assert(std::is_same::value, ""); return locale_ ? *static_cast(locale_) : std::locale(); } template FMT_FUNC auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result { auto& facet = std::use_facet>(loc.get()); auto grouping = facet.grouping(); auto thousands_sep = grouping.empty() ? Char() : facet.thousands_sep(); return {std::move(grouping), thousands_sep}; } template FMT_FUNC Char decimal_point_impl(locale_ref loc) { return std::use_facet>(loc.get()) .decimal_point(); } #else template FMT_FUNC auto thousands_sep_impl(locale_ref) -> thousands_sep_result { return {"\03", FMT_STATIC_THOUSANDS_SEPARATOR}; } template FMT_FUNC Char decimal_point_impl(locale_ref) { return '.'; } #endif } // namespace detail #if !FMT_MSC_VER FMT_API FMT_FUNC format_error::~format_error() FMT_NOEXCEPT = default; #endif FMT_FUNC std::system_error vsystem_error(int error_code, string_view format_str, format_args args) { auto ec = std::error_code(error_code, std::generic_category()); return std::system_error(ec, vformat(format_str, args)); } namespace detail { template <> FMT_FUNC int count_digits<4>(detail::fallback_uintptr n) { // fallback_uintptr is always stored in little endian. int i = static_cast(sizeof(void*)) - 1; while (i > 0 && n.value[i] == 0) --i; auto char_digits = std::numeric_limits::digits / 4; return i >= 0 ? i * char_digits + count_digits<4, unsigned>(n.value[i]) : 1; } #if __cplusplus < 201703L template constexpr const char basic_data::digits[][2]; template constexpr const char basic_data::hex_digits[]; template constexpr const char basic_data::signs[]; template constexpr const unsigned basic_data::prefixes[]; template constexpr const char basic_data::left_padding_shifts[]; template constexpr const char basic_data::right_padding_shifts[]; #endif template struct bits { static FMT_CONSTEXPR_DECL const int value = static_cast(sizeof(T) * std::numeric_limits::digits); }; class fp; template fp normalize(fp value); // Lower (upper) boundary is a value half way between a floating-point value // and its predecessor (successor). Boundaries have the same exponent as the // value so only significands are stored. struct boundaries { uint64_t lower; uint64_t upper; }; // A handmade floating-point number f * pow(2, e). class fp { private: using significand_type = uint64_t; template using is_supported_float = bool_constant; public: significand_type f; int e; // All sizes are in bits. // Subtract 1 to account for an implicit most significant bit in the // normalized form. static FMT_CONSTEXPR_DECL const int double_significand_size = std::numeric_limits::digits - 1; static FMT_CONSTEXPR_DECL const uint64_t implicit_bit = 1ULL << double_significand_size; static FMT_CONSTEXPR_DECL const int significand_size = bits::value; fp() : f(0), e(0) {} fp(uint64_t f_val, int e_val) : f(f_val), e(e_val) {} // Constructs fp from an IEEE754 double. It is a template to prevent compile // errors on platforms where double is not IEEE754. template explicit fp(Double d) { assign(d); } // Assigns d to this and return true iff predecessor is closer than successor. template ::value)> bool assign(Float d) { // Assume float is in the format [sign][exponent][significand]. using limits = std::numeric_limits; const int float_significand_size = limits::digits - 1; const int exponent_size = bits::value - float_significand_size - 1; // -1 for sign const uint64_t float_implicit_bit = 1ULL << float_significand_size; const uint64_t significand_mask = float_implicit_bit - 1; const uint64_t exponent_mask = (~0ULL >> 1) & ~significand_mask; const int exponent_bias = (1 << exponent_size) - limits::max_exponent - 1; constexpr bool is_double = sizeof(Float) == sizeof(uint64_t); auto u = bit_cast>(d); f = u & significand_mask; int biased_e = static_cast((u & exponent_mask) >> float_significand_size); // Predecessor is closer if d is a normalized power of 2 (f == 0) other than // the smallest normalized number (biased_e > 1). bool is_predecessor_closer = f == 0 && biased_e > 1; if (biased_e != 0) f += float_implicit_bit; else biased_e = 1; // Subnormals use biased exponent 1 (min exponent). e = biased_e - exponent_bias - float_significand_size; return is_predecessor_closer; } template ::value)> bool assign(Float) { *this = fp(); return false; } }; // Normalizes the value converted from double and multiplied by (1 << SHIFT). template fp normalize(fp value) { // Handle subnormals. const auto shifted_implicit_bit = fp::implicit_bit << SHIFT; while ((value.f & shifted_implicit_bit) == 0) { value.f <<= 1; --value.e; } // Subtract 1 to account for hidden bit. const auto offset = fp::significand_size - fp::double_significand_size - SHIFT - 1; value.f <<= offset; value.e -= offset; return value; } inline bool operator==(fp x, fp y) { return x.f == y.f && x.e == y.e; } // Computes lhs * rhs / pow(2, 64) rounded to nearest with half-up tie breaking. inline uint64_t multiply(uint64_t lhs, uint64_t rhs) { #if FMT_USE_INT128 auto product = static_cast<__uint128_t>(lhs) * rhs; auto f = static_cast(product >> 64); return (static_cast(product) & (1ULL << 63)) != 0 ? f + 1 : f; #else // Multiply 32-bit parts of significands. uint64_t mask = (1ULL << 32) - 1; uint64_t a = lhs >> 32, b = lhs & mask; uint64_t c = rhs >> 32, d = rhs & mask; uint64_t ac = a * c, bc = b * c, ad = a * d, bd = b * d; // Compute mid 64-bit of result and round. uint64_t mid = (bd >> 32) + (ad & mask) + (bc & mask) + (1U << 31); return ac + (ad >> 32) + (bc >> 32) + (mid >> 32); #endif } inline fp operator*(fp x, fp y) { return {multiply(x.f, y.f), x.e + y.e + 64}; } // Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its // (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`. inline fp get_cached_power(int min_exponent, int& pow10_exponent) { // Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340. // These are generated by support/compute-powers.py. static constexpr const uint64_t pow10_significands[] = { 0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76, 0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df, 0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c, 0x8dd01fad907ffc3c, 0xd3515c2831559a83, 0x9d71ac8fada6c9b5, 0xea9c227723ee8bcb, 0xaecc49914078536d, 0x823c12795db6ce57, 0xc21094364dfb5637, 0x9096ea6f3848984f, 0xd77485cb25823ac7, 0xa086cfcd97bf97f4, 0xef340a98172aace5, 0xb23867fb2a35b28e, 0x84c8d4dfd2c63f3b, 0xc5dd44271ad3cdba, 0x936b9fcebb25c996, 0xdbac6c247d62a584, 0xa3ab66580d5fdaf6, 0xf3e2f893dec3f126, 0xb5b5ada8aaff80b8, 0x87625f056c7c4a8b, 0xc9bcff6034c13053, 0x964e858c91ba2655, 0xdff9772470297ebd, 0xa6dfbd9fb8e5b88f, 0xf8a95fcf88747d94, 0xb94470938fa89bcf, 0x8a08f0f8bf0f156b, 0xcdb02555653131b6, 0x993fe2c6d07b7fac, 0xe45c10c42a2b3b06, 0xaa242499697392d3, 0xfd87b5f28300ca0e, 0xbce5086492111aeb, 0x8cbccc096f5088cc, 0xd1b71758e219652c, 0x9c40000000000000, 0xe8d4a51000000000, 0xad78ebc5ac620000, 0x813f3978f8940984, 0xc097ce7bc90715b3, 0x8f7e32ce7bea5c70, 0xd5d238a4abe98068, 0x9f4f2726179a2245, 0xed63a231d4c4fb27, 0xb0de65388cc8ada8, 0x83c7088e1aab65db, 0xc45d1df942711d9a, 0x924d692ca61be758, 0xda01ee641a708dea, 0xa26da3999aef774a, 0xf209787bb47d6b85, 0xb454e4a179dd1877, 0x865b86925b9bc5c2, 0xc83553c5c8965d3d, 0x952ab45cfa97a0b3, 0xde469fbd99a05fe3, 0xa59bc234db398c25, 0xf6c69a72a3989f5c, 0xb7dcbf5354e9bece, 0x88fcf317f22241e2, 0xcc20ce9bd35c78a5, 0x98165af37b2153df, 0xe2a0b5dc971f303a, 0xa8d9d1535ce3b396, 0xfb9b7cd9a4a7443c, 0xbb764c4ca7a44410, 0x8bab8eefb6409c1a, 0xd01fef10a657842c, 0x9b10a4e5e9913129, 0xe7109bfba19c0c9d, 0xac2820d9623bf429, 0x80444b5e7aa7cf85, 0xbf21e44003acdd2d, 0x8e679c2f5e44ff8f, 0xd433179d9c8cb841, 0x9e19db92b4e31ba9, 0xeb96bf6ebadf77d9, 0xaf87023b9bf0ee6b, }; // Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding // to significands above. static constexpr const int16_t pow10_exponents[] = { -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, -927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661, -635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369, -343, -316, -289, -263, -236, -210, -183, -157, -130, -103, -77, -50, -24, 3, 30, 56, 83, 109, 136, 162, 189, 216, 242, 269, 295, 322, 348, 375, 402, 428, 455, 481, 508, 534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800, 827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066}; const int shift = 32; const auto significand = static_cast(data::log10_2_significand); int index = static_cast( ((min_exponent + fp::significand_size - 1) * (significand >> shift) + ((int64_t(1) << shift) - 1)) // ceil >> 32 // arithmetic shift ); // Decimal exponent of the first (smallest) cached power of 10. const int first_dec_exp = -348; // Difference between 2 consecutive decimal exponents in cached powers of 10. const int dec_exp_step = 8; index = (index - first_dec_exp - 1) / dec_exp_step + 1; pow10_exponent = first_dec_exp + index * dec_exp_step; return {pow10_significands[index], pow10_exponents[index]}; } // A simple accumulator to hold the sums of terms in bigint::square if uint128_t // is not available. struct accumulator { uint64_t lower; uint64_t upper; accumulator() : lower(0), upper(0) {} explicit operator uint32_t() const { return static_cast(lower); } void operator+=(uint64_t n) { lower += n; if (lower < n) ++upper; } void operator>>=(int shift) { FMT_ASSERT(shift == 32, ""); (void)shift; lower = (upper << 32) | (lower >> 32); upper >>= 32; } }; class bigint { private: // A bigint is stored as an array of bigits (big digits), with bigit at index // 0 being the least significant one. using bigit = uint32_t; using double_bigit = uint64_t; enum { bigits_capacity = 32 }; basic_memory_buffer bigits_; int exp_; bigit operator[](int index) const { return bigits_[to_unsigned(index)]; } bigit& operator[](int index) { return bigits_[to_unsigned(index)]; } static FMT_CONSTEXPR_DECL const int bigit_bits = bits::value; friend struct formatter; void subtract_bigits(int index, bigit other, bigit& borrow) { auto result = static_cast((*this)[index]) - other - borrow; (*this)[index] = static_cast(result); borrow = static_cast(result >> (bigit_bits * 2 - 1)); } void remove_leading_zeros() { int num_bigits = static_cast(bigits_.size()) - 1; while (num_bigits > 0 && (*this)[num_bigits] == 0) --num_bigits; bigits_.resize(to_unsigned(num_bigits + 1)); } // Computes *this -= other assuming aligned bigints and *this >= other. void subtract_aligned(const bigint& other) { FMT_ASSERT(other.exp_ >= exp_, "unaligned bigints"); FMT_ASSERT(compare(*this, other) >= 0, ""); bigit borrow = 0; int i = other.exp_ - exp_; for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j) subtract_bigits(i, other.bigits_[j], borrow); while (borrow > 0) subtract_bigits(i, 0, borrow); remove_leading_zeros(); } void multiply(uint32_t value) { const double_bigit wide_value = value; bigit carry = 0; for (size_t i = 0, n = bigits_.size(); i < n; ++i) { double_bigit result = bigits_[i] * wide_value + carry; bigits_[i] = static_cast(result); carry = static_cast(result >> bigit_bits); } if (carry != 0) bigits_.push_back(carry); } void multiply(uint64_t value) { const bigit mask = ~bigit(0); const double_bigit lower = value & mask; const double_bigit upper = value >> bigit_bits; double_bigit carry = 0; for (size_t i = 0, n = bigits_.size(); i < n; ++i) { double_bigit result = bigits_[i] * lower + (carry & mask); carry = bigits_[i] * upper + (result >> bigit_bits) + (carry >> bigit_bits); bigits_[i] = static_cast(result); } while (carry != 0) { bigits_.push_back(carry & mask); carry >>= bigit_bits; } } public: bigint() : exp_(0) {} explicit bigint(uint64_t n) { assign(n); } ~bigint() { FMT_ASSERT(bigits_.capacity() <= bigits_capacity, ""); } bigint(const bigint&) = delete; void operator=(const bigint&) = delete; void assign(const bigint& other) { auto size = other.bigits_.size(); bigits_.resize(size); auto data = other.bigits_.data(); std::copy(data, data + size, make_checked(bigits_.data(), size)); exp_ = other.exp_; } void assign(uint64_t n) { size_t num_bigits = 0; do { bigits_[num_bigits++] = n & ~bigit(0); n >>= bigit_bits; } while (n != 0); bigits_.resize(num_bigits); exp_ = 0; } int num_bigits() const { return static_cast(bigits_.size()) + exp_; } FMT_NOINLINE bigint& operator<<=(int shift) { FMT_ASSERT(shift >= 0, ""); exp_ += shift / bigit_bits; shift %= bigit_bits; if (shift == 0) return *this; bigit carry = 0; for (size_t i = 0, n = bigits_.size(); i < n; ++i) { bigit c = bigits_[i] >> (bigit_bits - shift); bigits_[i] = (bigits_[i] << shift) + carry; carry = c; } if (carry != 0) bigits_.push_back(carry); return *this; } template bigint& operator*=(Int value) { FMT_ASSERT(value > 0, ""); multiply(uint32_or_64_or_128_t(value)); return *this; } friend int compare(const bigint& lhs, const bigint& rhs) { int num_lhs_bigits = lhs.num_bigits(), num_rhs_bigits = rhs.num_bigits(); if (num_lhs_bigits != num_rhs_bigits) return num_lhs_bigits > num_rhs_bigits ? 1 : -1; int i = static_cast(lhs.bigits_.size()) - 1; int j = static_cast(rhs.bigits_.size()) - 1; int end = i - j; if (end < 0) end = 0; for (; i >= end; --i, --j) { bigit lhs_bigit = lhs[i], rhs_bigit = rhs[j]; if (lhs_bigit != rhs_bigit) return lhs_bigit > rhs_bigit ? 1 : -1; } if (i != j) return i > j ? 1 : -1; return 0; } // Returns compare(lhs1 + lhs2, rhs). friend int add_compare(const bigint& lhs1, const bigint& lhs2, const bigint& rhs) { int max_lhs_bigits = (std::max)(lhs1.num_bigits(), lhs2.num_bigits()); int num_rhs_bigits = rhs.num_bigits(); if (max_lhs_bigits + 1 < num_rhs_bigits) return -1; if (max_lhs_bigits > num_rhs_bigits) return 1; auto get_bigit = [](const bigint& n, int i) -> bigit { return i >= n.exp_ && i < n.num_bigits() ? n[i - n.exp_] : 0; }; double_bigit borrow = 0; int min_exp = (std::min)((std::min)(lhs1.exp_, lhs2.exp_), rhs.exp_); for (int i = num_rhs_bigits - 1; i >= min_exp; --i) { double_bigit sum = static_cast(get_bigit(lhs1, i)) + get_bigit(lhs2, i); bigit rhs_bigit = get_bigit(rhs, i); if (sum > rhs_bigit + borrow) return 1; borrow = rhs_bigit + borrow - sum; if (borrow > 1) return -1; borrow <<= bigit_bits; } return borrow != 0 ? -1 : 0; } // Assigns pow(10, exp) to this bigint. void assign_pow10(int exp) { FMT_ASSERT(exp >= 0, ""); if (exp == 0) return assign(1); // Find the top bit. int bitmask = 1; while (exp >= bitmask) bitmask <<= 1; bitmask >>= 1; // pow(10, exp) = pow(5, exp) * pow(2, exp). First compute pow(5, exp) by // repeated squaring and multiplication. assign(5); bitmask >>= 1; while (bitmask != 0) { square(); if ((exp & bitmask) != 0) *this *= 5; bitmask >>= 1; } *this <<= exp; // Multiply by pow(2, exp) by shifting. } void square() { int num_bigits = static_cast(bigits_.size()); int num_result_bigits = 2 * num_bigits; basic_memory_buffer n(std::move(bigits_)); bigits_.resize(to_unsigned(num_result_bigits)); using accumulator_t = conditional_t; auto sum = accumulator_t(); for (int bigit_index = 0; bigit_index < num_bigits; ++bigit_index) { // Compute bigit at position bigit_index of the result by adding // cross-product terms n[i] * n[j] such that i + j == bigit_index. for (int i = 0, j = bigit_index; j >= 0; ++i, --j) { // Most terms are multiplied twice which can be optimized in the future. sum += static_cast(n[i]) * n[j]; } (*this)[bigit_index] = static_cast(sum); sum >>= bits::value; // Compute the carry. } // Do the same for the top half. for (int bigit_index = num_bigits; bigit_index < num_result_bigits; ++bigit_index) { for (int j = num_bigits - 1, i = bigit_index - j; i < num_bigits;) sum += static_cast(n[i++]) * n[j--]; (*this)[bigit_index] = static_cast(sum); sum >>= bits::value; } --num_result_bigits; remove_leading_zeros(); exp_ *= 2; } // If this bigint has a bigger exponent than other, adds trailing zero to make // exponents equal. This simplifies some operations such as subtraction. void align(const bigint& other) { int exp_difference = exp_ - other.exp_; if (exp_difference <= 0) return; int num_bigits = static_cast(bigits_.size()); bigits_.resize(to_unsigned(num_bigits + exp_difference)); for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j) bigits_[j] = bigits_[i]; std::uninitialized_fill_n(bigits_.data(), exp_difference, 0); exp_ -= exp_difference; } // Divides this bignum by divisor, assigning the remainder to this and // returning the quotient. int divmod_assign(const bigint& divisor) { FMT_ASSERT(this != &divisor, ""); if (compare(*this, divisor) < 0) return 0; FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, ""); align(divisor); int quotient = 0; do { subtract_aligned(divisor); ++quotient; } while (compare(*this, divisor) >= 0); return quotient; } }; enum class round_direction { unknown, up, down }; // Given the divisor (normally a power of 10), the remainder = v % divisor for // some number v and the error, returns whether v should be rounded up, down, or // whether the rounding direction can't be determined due to error. // error should be less than divisor / 2. inline round_direction get_round_direction(uint64_t divisor, uint64_t remainder, uint64_t error) { FMT_ASSERT(remainder < divisor, ""); // divisor - remainder won't overflow. FMT_ASSERT(error < divisor, ""); // divisor - error won't overflow. FMT_ASSERT(error < divisor - error, ""); // error * 2 won't overflow. // Round down if (remainder + error) * 2 <= divisor. if (remainder <= divisor - remainder && error * 2 <= divisor - remainder * 2) return round_direction::down; // Round up if (remainder - error) * 2 >= divisor. if (remainder >= error && remainder - error >= divisor - (remainder - error)) { return round_direction::up; } return round_direction::unknown; } namespace digits { enum result { more, // Generate more digits. done, // Done generating digits. error // Digit generation cancelled due to an error. }; } inline uint64_t power_of_10_64(int exp) { static constexpr const uint64_t data[] = {1, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL), 10000000000000000000ULL}; return data[exp]; } // Generates output using the Grisu digit-gen algorithm. // error: the size of the region (lower, upper) outside of which numbers // definitely do not round to value (Delta in Grisu3). template FMT_INLINE digits::result grisu_gen_digits(fp value, uint64_t error, int& exp, Handler& handler) { const fp one(1ULL << -value.e, value.e); // The integral part of scaled value (p1 in Grisu) = value / one. It cannot be // zero because it contains a product of two 64-bit numbers with MSB set (due // to normalization) - 1, shifted right by at most 60 bits. auto integral = static_cast(value.f >> -one.e); FMT_ASSERT(integral != 0, ""); FMT_ASSERT(integral == value.f >> -one.e, ""); // The fractional part of scaled value (p2 in Grisu) c = value % one. uint64_t fractional = value.f & (one.f - 1); exp = count_digits(integral); // kappa in Grisu. // Divide by 10 to prevent overflow. auto result = handler.on_start(power_of_10_64(exp - 1) << -one.e, value.f / 10, error * 10, exp); if (result != digits::more) return result; // Generate digits for the integral part. This can produce up to 10 digits. do { uint32_t digit = 0; auto divmod_integral = [&](uint32_t divisor) { digit = integral / divisor; integral %= divisor; }; // This optimization by Milo Yip reduces the number of integer divisions by // one per iteration. switch (exp) { case 10: divmod_integral(1000000000); break; case 9: divmod_integral(100000000); break; case 8: divmod_integral(10000000); break; case 7: divmod_integral(1000000); break; case 6: divmod_integral(100000); break; case 5: divmod_integral(10000); break; case 4: divmod_integral(1000); break; case 3: divmod_integral(100); break; case 2: divmod_integral(10); break; case 1: digit = integral; integral = 0; break; default: FMT_ASSERT(false, "invalid number of digits"); } --exp; auto remainder = (static_cast(integral) << -one.e) + fractional; result = handler.on_digit(static_cast('0' + digit), power_of_10_64(exp) << -one.e, remainder, error, exp, true); if (result != digits::more) return result; } while (exp > 0); // Generate digits for the fractional part. for (;;) { fractional *= 10; error *= 10; char digit = static_cast('0' + (fractional >> -one.e)); fractional &= one.f - 1; --exp; result = handler.on_digit(digit, one.f, fractional, error, exp, false); if (result != digits::more) return result; } } // The fixed precision digit handler. struct fixed_handler { char* buf; int size; int precision; int exp10; bool fixed; digits::result on_start(uint64_t divisor, uint64_t remainder, uint64_t error, int& exp) { // Non-fixed formats require at least one digit and no precision adjustment. if (!fixed) return digits::more; // Adjust fixed precision by exponent because it is relative to decimal // point. precision += exp + exp10; // Check if precision is satisfied just by leading zeros, e.g. // format("{:.2f}", 0.001) gives "0.00" without generating any digits. if (precision > 0) return digits::more; if (precision < 0) return digits::done; auto dir = get_round_direction(divisor, remainder, error); if (dir == round_direction::unknown) return digits::error; buf[size++] = dir == round_direction::up ? '1' : '0'; return digits::done; } digits::result on_digit(char digit, uint64_t divisor, uint64_t remainder, uint64_t error, int, bool integral) { FMT_ASSERT(remainder < divisor, ""); buf[size++] = digit; if (!integral && error >= remainder) return digits::error; if (size < precision) return digits::more; if (!integral) { // Check if error * 2 < divisor with overflow prevention. // The check is not needed for the integral part because error = 1 // and divisor > (1 << 32) there. if (error >= divisor || error >= divisor - error) return digits::error; } else { FMT_ASSERT(error == 1 && divisor > 2, ""); } auto dir = get_round_direction(divisor, remainder, error); if (dir != round_direction::up) return dir == round_direction::down ? digits::done : digits::error; ++buf[size - 1]; for (int i = size - 1; i > 0 && buf[i] > '9'; --i) { buf[i] = '0'; ++buf[i - 1]; } if (buf[0] > '9') { buf[0] = '1'; if (fixed) buf[size++] = '0'; else ++exp10; } return digits::done; } }; // A 128-bit integer type used internally, struct uint128_wrapper { uint128_wrapper() = default; #if FMT_USE_INT128 uint128_t internal_; constexpr uint128_wrapper(uint64_t high, uint64_t low) FMT_NOEXCEPT : internal_{static_cast(low) | (static_cast(high) << 64)} {} constexpr uint128_wrapper(uint128_t u) : internal_{u} {} constexpr uint64_t high() const FMT_NOEXCEPT { return uint64_t(internal_ >> 64); } constexpr uint64_t low() const FMT_NOEXCEPT { return uint64_t(internal_); } uint128_wrapper& operator+=(uint64_t n) FMT_NOEXCEPT { internal_ += n; return *this; } #else uint64_t high_; uint64_t low_; constexpr uint128_wrapper(uint64_t high, uint64_t low) FMT_NOEXCEPT : high_{high}, low_{low} {} constexpr uint64_t high() const FMT_NOEXCEPT { return high_; } constexpr uint64_t low() const FMT_NOEXCEPT { return low_; } uint128_wrapper& operator+=(uint64_t n) FMT_NOEXCEPT { # if defined(_MSC_VER) && defined(_M_X64) unsigned char carry = _addcarry_u64(0, low_, n, &low_); _addcarry_u64(carry, high_, 0, &high_); return *this; # else uint64_t sum = low_ + n; high_ += (sum < low_ ? 1 : 0); low_ = sum; return *this; # endif } #endif }; // Implementation of Dragonbox algorithm: https://github.com/jk-jeon/dragonbox. namespace dragonbox { // Computes 128-bit result of multiplication of two 64-bit unsigned integers. inline uint128_wrapper umul128(uint64_t x, uint64_t y) FMT_NOEXCEPT { #if FMT_USE_INT128 return static_cast(x) * static_cast(y); #elif defined(_MSC_VER) && defined(_M_X64) uint128_wrapper result; result.low_ = _umul128(x, y, &result.high_); return result; #else const uint64_t mask = (uint64_t(1) << 32) - uint64_t(1); uint64_t a = x >> 32; uint64_t b = x & mask; uint64_t c = y >> 32; uint64_t d = y & mask; uint64_t ac = a * c; uint64_t bc = b * c; uint64_t ad = a * d; uint64_t bd = b * d; uint64_t intermediate = (bd >> 32) + (ad & mask) + (bc & mask); return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32), (intermediate << 32) + (bd & mask)}; #endif } // Computes upper 64 bits of multiplication of two 64-bit unsigned integers. inline uint64_t umul128_upper64(uint64_t x, uint64_t y) FMT_NOEXCEPT { #if FMT_USE_INT128 auto p = static_cast(x) * static_cast(y); return static_cast(p >> 64); #elif defined(_MSC_VER) && defined(_M_X64) return __umulh(x, y); #else return umul128(x, y).high(); #endif } // Computes upper 64 bits of multiplication of a 64-bit unsigned integer and a // 128-bit unsigned integer. inline uint64_t umul192_upper64(uint64_t x, uint128_wrapper y) FMT_NOEXCEPT { uint128_wrapper g0 = umul128(x, y.high()); g0 += umul128_upper64(x, y.low()); return g0.high(); } // Computes upper 32 bits of multiplication of a 32-bit unsigned integer and a // 64-bit unsigned integer. inline uint32_t umul96_upper32(uint32_t x, uint64_t y) FMT_NOEXCEPT { return static_cast(umul128_upper64(x, y)); } // Computes middle 64 bits of multiplication of a 64-bit unsigned integer and a // 128-bit unsigned integer. inline uint64_t umul192_middle64(uint64_t x, uint128_wrapper y) FMT_NOEXCEPT { uint64_t g01 = x * y.high(); uint64_t g10 = umul128_upper64(x, y.low()); return g01 + g10; } // Computes lower 64 bits of multiplication of a 32-bit unsigned integer and a // 64-bit unsigned integer. inline uint64_t umul96_lower64(uint32_t x, uint64_t y) FMT_NOEXCEPT { return x * y; } // Computes floor(log10(pow(2, e))) for e in [-1700, 1700] using the method from // https://fmt.dev/papers/Grisu-Exact.pdf#page=5, section 3.4. inline int floor_log10_pow2(int e) FMT_NOEXCEPT { FMT_ASSERT(e <= 1700 && e >= -1700, "too large exponent"); const int shift = 22; return (e * static_cast(data::log10_2_significand >> (64 - shift))) >> shift; } // Various fast log computations. inline int floor_log2_pow10(int e) FMT_NOEXCEPT { FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent"); const uint64_t log2_10_integer_part = 3; const uint64_t log2_10_fractional_digits = 0x5269e12f346e2bf9; const int shift_amount = 19; return (e * static_cast( (log2_10_integer_part << shift_amount) | (log2_10_fractional_digits >> (64 - shift_amount)))) >> shift_amount; } inline int floor_log10_pow2_minus_log10_4_over_3(int e) FMT_NOEXCEPT { FMT_ASSERT(e <= 1700 && e >= -1700, "too large exponent"); const uint64_t log10_4_over_3_fractional_digits = 0x1ffbfc2bbc780375; const int shift_amount = 22; return (e * static_cast(data::log10_2_significand >> (64 - shift_amount)) - static_cast(log10_4_over_3_fractional_digits >> (64 - shift_amount))) >> shift_amount; } // Returns true iff x is divisible by pow(2, exp). inline bool divisible_by_power_of_2(uint32_t x, int exp) FMT_NOEXCEPT { FMT_ASSERT(exp >= 1, ""); FMT_ASSERT(x != 0, ""); #ifdef FMT_BUILTIN_CTZ return FMT_BUILTIN_CTZ(x) >= exp; #else return exp < num_bits() && x == ((x >> exp) << exp); #endif } inline bool divisible_by_power_of_2(uint64_t x, int exp) FMT_NOEXCEPT { FMT_ASSERT(exp >= 1, ""); FMT_ASSERT(x != 0, ""); #ifdef FMT_BUILTIN_CTZLL return FMT_BUILTIN_CTZLL(x) >= exp; #else return exp < num_bits() && x == ((x >> exp) << exp); #endif } // Table entry type for divisibility test. template struct divtest_table_entry { T mod_inv; T max_quotient; }; // Returns true iff x is divisible by pow(5, exp). inline bool divisible_by_power_of_5(uint32_t x, int exp) FMT_NOEXCEPT { FMT_ASSERT(exp <= 10, "too large exponent"); static constexpr const divtest_table_entry divtest_table[] = { {0x00000001, 0xffffffff}, {0xcccccccd, 0x33333333}, {0xc28f5c29, 0x0a3d70a3}, {0x26e978d5, 0x020c49ba}, {0x3afb7e91, 0x0068db8b}, {0x0bcbe61d, 0x0014f8b5}, {0x68c26139, 0x000431bd}, {0xae8d46a5, 0x0000d6bf}, {0x22e90e21, 0x00002af3}, {0x3a2e9c6d, 0x00000897}, {0x3ed61f49, 0x000001b7}}; return x * divtest_table[exp].mod_inv <= divtest_table[exp].max_quotient; } inline bool divisible_by_power_of_5(uint64_t x, int exp) FMT_NOEXCEPT { FMT_ASSERT(exp <= 23, "too large exponent"); static constexpr const divtest_table_entry divtest_table[] = { {0x0000000000000001, 0xffffffffffffffff}, {0xcccccccccccccccd, 0x3333333333333333}, {0x8f5c28f5c28f5c29, 0x0a3d70a3d70a3d70}, {0x1cac083126e978d5, 0x020c49ba5e353f7c}, {0xd288ce703afb7e91, 0x0068db8bac710cb2}, {0x5d4e8fb00bcbe61d, 0x0014f8b588e368f0}, {0x790fb65668c26139, 0x000431bde82d7b63}, {0xe5032477ae8d46a5, 0x0000d6bf94d5e57a}, {0xc767074b22e90e21, 0x00002af31dc46118}, {0x8e47ce423a2e9c6d, 0x0000089705f4136b}, {0x4fa7f60d3ed61f49, 0x000001b7cdfd9d7b}, {0x0fee64690c913975, 0x00000057f5ff85e5}, {0x3662e0e1cf503eb1, 0x000000119799812d}, {0xa47a2cf9f6433fbd, 0x0000000384b84d09}, {0x54186f653140a659, 0x00000000b424dc35}, {0x7738164770402145, 0x0000000024075f3d}, {0xe4a4d1417cd9a041, 0x000000000734aca5}, {0xc75429d9e5c5200d, 0x000000000170ef54}, {0xc1773b91fac10669, 0x000000000049c977}, {0x26b172506559ce15, 0x00000000000ec1e4}, {0xd489e3a9addec2d1, 0x000000000002f394}, {0x90e860bb892c8d5d, 0x000000000000971d}, {0x502e79bf1b6f4f79, 0x0000000000001e39}, {0xdcd618596be30fe5, 0x000000000000060b}}; return x * divtest_table[exp].mod_inv <= divtest_table[exp].max_quotient; } // Replaces n by floor(n / pow(5, N)) returning true if and only if n is // divisible by pow(5, N). // Precondition: n <= 2 * pow(5, N + 1). template bool check_divisibility_and_divide_by_pow5(uint32_t& n) FMT_NOEXCEPT { static constexpr struct { uint32_t magic_number; int bits_for_comparison; uint32_t threshold; int shift_amount; } infos[] = {{0xcccd, 16, 0x3333, 18}, {0xa429, 8, 0x0a, 20}}; constexpr auto info = infos[N - 1]; n *= info.magic_number; const uint32_t comparison_mask = (1u << info.bits_for_comparison) - 1; bool result = (n & comparison_mask) <= info.threshold; n >>= info.shift_amount; return result; } // Computes floor(n / pow(10, N)) for small n and N. // Precondition: n <= pow(10, N + 1). template uint32_t small_division_by_pow10(uint32_t n) FMT_NOEXCEPT { static constexpr struct { uint32_t magic_number; int shift_amount; uint32_t divisor_times_10; } infos[] = {{0xcccd, 19, 100}, {0xa3d8, 22, 1000}}; constexpr auto info = infos[N - 1]; FMT_ASSERT(n <= info.divisor_times_10, "n is too large"); return n * info.magic_number >> info.shift_amount; } // Computes floor(n / 10^(kappa + 1)) (float) inline uint32_t divide_by_10_to_kappa_plus_1(uint32_t n) FMT_NOEXCEPT { return n / float_info::big_divisor; } // Computes floor(n / 10^(kappa + 1)) (double) inline uint64_t divide_by_10_to_kappa_plus_1(uint64_t n) FMT_NOEXCEPT { return umul128_upper64(n, 0x83126e978d4fdf3c) >> 9; } // Various subroutines using pow10 cache template struct cache_accessor; template <> struct cache_accessor { using carrier_uint = float_info::carrier_uint; using cache_entry_type = uint64_t; static uint64_t get_cached_power(int k) FMT_NOEXCEPT { FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, "k is out of range"); constexpr const uint64_t pow10_significands[] = { 0x81ceb32c4b43fcf5, 0xa2425ff75e14fc32, 0xcad2f7f5359a3b3f, 0xfd87b5f28300ca0e, 0x9e74d1b791e07e49, 0xc612062576589ddb, 0xf79687aed3eec552, 0x9abe14cd44753b53, 0xc16d9a0095928a28, 0xf1c90080baf72cb2, 0x971da05074da7bef, 0xbce5086492111aeb, 0xec1e4a7db69561a6, 0x9392ee8e921d5d08, 0xb877aa3236a4b44a, 0xe69594bec44de15c, 0x901d7cf73ab0acda, 0xb424dc35095cd810, 0xe12e13424bb40e14, 0x8cbccc096f5088cc, 0xafebff0bcb24aaff, 0xdbe6fecebdedd5bf, 0x89705f4136b4a598, 0xabcc77118461cefd, 0xd6bf94d5e57a42bd, 0x8637bd05af6c69b6, 0xa7c5ac471b478424, 0xd1b71758e219652c, 0x83126e978d4fdf3c, 0xa3d70a3d70a3d70b, 0xcccccccccccccccd, 0x8000000000000000, 0xa000000000000000, 0xc800000000000000, 0xfa00000000000000, 0x9c40000000000000, 0xc350000000000000, 0xf424000000000000, 0x9896800000000000, 0xbebc200000000000, 0xee6b280000000000, 0x9502f90000000000, 0xba43b74000000000, 0xe8d4a51000000000, 0x9184e72a00000000, 0xb5e620f480000000, 0xe35fa931a0000000, 0x8e1bc9bf04000000, 0xb1a2bc2ec5000000, 0xde0b6b3a76400000, 0x8ac7230489e80000, 0xad78ebc5ac620000, 0xd8d726b7177a8000, 0x878678326eac9000, 0xa968163f0a57b400, 0xd3c21bcecceda100, 0x84595161401484a0, 0xa56fa5b99019a5c8, 0xcecb8f27f4200f3a, 0x813f3978f8940984, 0xa18f07d736b90be5, 0xc9f2c9cd04674ede, 0xfc6f7c4045812296, 0x9dc5ada82b70b59d, 0xc5371912364ce305, 0xf684df56c3e01bc6, 0x9a130b963a6c115c, 0xc097ce7bc90715b3, 0xf0bdc21abb48db20, 0x96769950b50d88f4, 0xbc143fa4e250eb31, 0xeb194f8e1ae525fd, 0x92efd1b8d0cf37be, 0xb7abc627050305ad, 0xe596b7b0c643c719, 0x8f7e32ce7bea5c6f, 0xb35dbf821ae4f38b, 0xe0352f62a19e306e}; return pow10_significands[k - float_info::min_k]; } static carrier_uint compute_mul(carrier_uint u, const cache_entry_type& cache) FMT_NOEXCEPT { return umul96_upper32(u, cache); } static uint32_t compute_delta(const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { return static_cast(cache >> (64 - 1 - beta_minus_1)); } static bool compute_mul_parity(carrier_uint two_f, const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { FMT_ASSERT(beta_minus_1 >= 1, ""); FMT_ASSERT(beta_minus_1 < 64, ""); return ((umul96_lower64(two_f, cache) >> (64 - beta_minus_1)) & 1) != 0; } static carrier_uint compute_left_endpoint_for_shorter_interval_case( const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { return static_cast( (cache - (cache >> (float_info::significand_bits + 2))) >> (64 - float_info::significand_bits - 1 - beta_minus_1)); } static carrier_uint compute_right_endpoint_for_shorter_interval_case( const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { return static_cast( (cache + (cache >> (float_info::significand_bits + 1))) >> (64 - float_info::significand_bits - 1 - beta_minus_1)); } static carrier_uint compute_round_up_for_shorter_interval_case( const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { return (static_cast( cache >> (64 - float_info::significand_bits - 2 - beta_minus_1)) + 1) / 2; } }; template <> struct cache_accessor { using carrier_uint = float_info::carrier_uint; using cache_entry_type = uint128_wrapper; static uint128_wrapper get_cached_power(int k) FMT_NOEXCEPT { FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, "k is out of range"); static constexpr const uint128_wrapper pow10_significands[] = { #if FMT_USE_FULL_CACHE_DRAGONBOX {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, {0x9faacf3df73609b1, 0x77b191618c54e9ad}, {0xc795830d75038c1d, 0xd59df5b9ef6a2418}, {0xf97ae3d0d2446f25, 0x4b0573286b44ad1e}, {0x9becce62836ac577, 0x4ee367f9430aec33}, {0xc2e801fb244576d5, 0x229c41f793cda740}, {0xf3a20279ed56d48a, 0x6b43527578c11110}, {0x9845418c345644d6, 0x830a13896b78aaaa}, {0xbe5691ef416bd60c, 0x23cc986bc656d554}, {0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa9}, {0x94b3a202eb1c3f39, 0x7bf7d71432f3d6aa}, {0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc54}, {0xe858ad248f5c22c9, 0xd1b3400f8f9cff69}, {0x91376c36d99995be, 0x23100809b9c21fa2}, {0xb58547448ffffb2d, 0xabd40a0c2832a78b}, {0xe2e69915b3fff9f9, 0x16c90c8f323f516d}, {0x8dd01fad907ffc3b, 0xae3da7d97f6792e4}, {0xb1442798f49ffb4a, 0x99cd11cfdf41779d}, {0xdd95317f31c7fa1d, 0x40405643d711d584}, {0x8a7d3eef7f1cfc52, 0x482835ea666b2573}, {0xad1c8eab5ee43b66, 0xda3243650005eed0}, {0xd863b256369d4a40, 0x90bed43e40076a83}, {0x873e4f75e2224e68, 0x5a7744a6e804a292}, {0xa90de3535aaae202, 0x711515d0a205cb37}, {0xd3515c2831559a83, 0x0d5a5b44ca873e04}, {0x8412d9991ed58091, 0xe858790afe9486c3}, {0xa5178fff668ae0b6, 0x626e974dbe39a873}, {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, {0x80fa687f881c7f8e, 0x7ce66634bc9d0b9a}, {0xa139029f6a239f72, 0x1c1fffc1ebc44e81}, {0xc987434744ac874e, 0xa327ffb266b56221}, {0xfbe9141915d7a922, 0x4bf1ff9f0062baa9}, {0x9d71ac8fada6c9b5, 0x6f773fc3603db4aa}, {0xc4ce17b399107c22, 0xcb550fb4384d21d4}, {0xf6019da07f549b2b, 0x7e2a53a146606a49}, {0x99c102844f94e0fb, 0x2eda7444cbfc426e}, {0xc0314325637a1939, 0xfa911155fefb5309}, {0xf03d93eebc589f88, 0x793555ab7eba27cb}, {0x96267c7535b763b5, 0x4bc1558b2f3458df}, {0xbbb01b9283253ca2, 0x9eb1aaedfb016f17}, {0xea9c227723ee8bcb, 0x465e15a979c1cadd}, {0x92a1958a7675175f, 0x0bfacd89ec191eca}, {0xb749faed14125d36, 0xcef980ec671f667c}, {0xe51c79a85916f484, 0x82b7e12780e7401b}, {0x8f31cc0937ae58d2, 0xd1b2ecb8b0908811}, {0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa16}, {0xdfbdcece67006ac9, 0x67a791e093e1d49b}, {0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e1}, {0xaecc49914078536d, 0x58fae9f773886e19}, {0xda7f5bf590966848, 0xaf39a475506a899f}, {0x888f99797a5e012d, 0x6d8406c952429604}, {0xaab37fd7d8f58178, 0xc8e5087ba6d33b84}, {0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a65}, {0x855c3be0a17fcd26, 0x5cf2eea09a550680}, {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f}, {0xd0601d8efc57b08b, 0xf13b94daf124da27}, {0x823c12795db6ce57, 0x76c53d08d6b70859}, {0xa2cb1717b52481ed, 0x54768c4b0c64ca6f}, {0xcb7ddcdda26da268, 0xa9942f5dcf7dfd0a}, {0xfe5d54150b090b02, 0xd3f93b35435d7c4d}, {0x9efa548d26e5a6e1, 0xc47bc5014a1a6db0}, {0xc6b8e9b0709f109a, 0x359ab6419ca1091c}, {0xf867241c8cc6d4c0, 0xc30163d203c94b63}, {0x9b407691d7fc44f8, 0x79e0de63425dcf1e}, {0xc21094364dfb5636, 0x985915fc12f542e5}, {0xf294b943e17a2bc4, 0x3e6f5b7b17b2939e}, {0x979cf3ca6cec5b5a, 0xa705992ceecf9c43}, {0xbd8430bd08277231, 0x50c6ff782a838354}, {0xece53cec4a314ebd, 0xa4f8bf5635246429}, {0x940f4613ae5ed136, 0x871b7795e136be9a}, {0xb913179899f68584, 0x28e2557b59846e40}, {0xe757dd7ec07426e5, 0x331aeada2fe589d0}, {0x9096ea6f3848984f, 0x3ff0d2c85def7622}, {0xb4bca50b065abe63, 0x0fed077a756b53aa}, {0xe1ebce4dc7f16dfb, 0xd3e8495912c62895}, {0x8d3360f09cf6e4bd, 0x64712dd7abbbd95d}, {0xb080392cc4349dec, 0xbd8d794d96aacfb4}, {0xdca04777f541c567, 0xecf0d7a0fc5583a1}, {0x89e42caaf9491b60, 0xf41686c49db57245}, {0xac5d37d5b79b6239, 0x311c2875c522ced6}, {0xd77485cb25823ac7, 0x7d633293366b828c}, {0x86a8d39ef77164bc, 0xae5dff9c02033198}, {0xa8530886b54dbdeb, 0xd9f57f830283fdfd}, {0xd267caa862a12d66, 0xd072df63c324fd7c}, {0x8380dea93da4bc60, 0x4247cb9e59f71e6e}, {0xa46116538d0deb78, 0x52d9be85f074e609}, {0xcd795be870516656, 0x67902e276c921f8c}, {0x806bd9714632dff6, 0x00ba1cd8a3db53b7}, {0xa086cfcd97bf97f3, 0x80e8a40eccd228a5}, {0xc8a883c0fdaf7df0, 0x6122cd128006b2ce}, {0xfad2a4b13d1b5d6c, 0x796b805720085f82}, {0x9cc3a6eec6311a63, 0xcbe3303674053bb1}, {0xc3f490aa77bd60fc, 0xbedbfc4411068a9d}, {0xf4f1b4d515acb93b, 0xee92fb5515482d45}, {0x991711052d8bf3c5, 0x751bdd152d4d1c4b}, {0xbf5cd54678eef0b6, 0xd262d45a78a0635e}, {0xef340a98172aace4, 0x86fb897116c87c35}, {0x9580869f0e7aac0e, 0xd45d35e6ae3d4da1}, {0xbae0a846d2195712, 0x8974836059cca10a}, {0xe998d258869facd7, 0x2bd1a438703fc94c}, {0x91ff83775423cc06, 0x7b6306a34627ddd0}, {0xb67f6455292cbf08, 0x1a3bc84c17b1d543}, {0xe41f3d6a7377eeca, 0x20caba5f1d9e4a94}, {0x8e938662882af53e, 0x547eb47b7282ee9d}, {0xb23867fb2a35b28d, 0xe99e619a4f23aa44}, {0xdec681f9f4c31f31, 0x6405fa00e2ec94d5}, {0x8b3c113c38f9f37e, 0xde83bc408dd3dd05}, {0xae0b158b4738705e, 0x9624ab50b148d446}, {0xd98ddaee19068c76, 0x3badd624dd9b0958}, {0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d7}, {0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4d}, {0xd47487cc8470652b, 0x7647c32000696720}, {0x84c8d4dfd2c63f3b, 0x29ecd9f40041e074}, {0xa5fb0a17c777cf09, 0xf468107100525891}, {0xcf79cc9db955c2cc, 0x7182148d4066eeb5}, {0x81ac1fe293d599bf, 0xc6f14cd848405531}, {0xa21727db38cb002f, 0xb8ada00e5a506a7d}, {0xca9cf1d206fdc03b, 0xa6d90811f0e4851d}, {0xfd442e4688bd304a, 0x908f4a166d1da664}, {0x9e4a9cec15763e2e, 0x9a598e4e043287ff}, {0xc5dd44271ad3cdba, 0x40eff1e1853f29fe}, {0xf7549530e188c128, 0xd12bee59e68ef47d}, {0x9a94dd3e8cf578b9, 0x82bb74f8301958cf}, {0xc13a148e3032d6e7, 0xe36a52363c1faf02}, {0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac2}, {0x96f5600f15a7b7e5, 0x29ab103a5ef8c0ba}, {0xbcb2b812db11a5de, 0x7415d448f6b6f0e8}, {0xebdf661791d60f56, 0x111b495b3464ad22}, {0x936b9fcebb25c995, 0xcab10dd900beec35}, {0xb84687c269ef3bfb, 0x3d5d514f40eea743}, {0xe65829b3046b0afa, 0x0cb4a5a3112a5113}, {0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ac}, {0xb3f4e093db73a093, 0x59ed216765690f57}, {0xe0f218b8d25088b8, 0x306869c13ec3532d}, {0x8c974f7383725573, 0x1e414218c73a13fc}, {0xafbd2350644eeacf, 0xe5d1929ef90898fb}, {0xdbac6c247d62a583, 0xdf45f746b74abf3a}, {0x894bc396ce5da772, 0x6b8bba8c328eb784}, {0xab9eb47c81f5114f, 0x066ea92f3f326565}, {0xd686619ba27255a2, 0xc80a537b0efefebe}, {0x8613fd0145877585, 0xbd06742ce95f5f37}, {0xa798fc4196e952e7, 0x2c48113823b73705}, {0xd17f3b51fca3a7a0, 0xf75a15862ca504c6}, {0x82ef85133de648c4, 0x9a984d73dbe722fc}, {0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebbb}, {0xcc963fee10b7d1b3, 0x318df905079926a9}, {0xffbbcfe994e5c61f, 0xfdf17746497f7053}, {0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa634}, {0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc1}, {0xf9bd690a1b68637b, 0x3dfdce7aa3c673b1}, {0x9c1661a651213e2d, 0x06bea10ca65c084f}, {0xc31bfa0fe5698db8, 0x486e494fcff30a63}, {0xf3e2f893dec3f126, 0x5a89dba3c3efccfb}, {0x986ddb5c6b3a76b7, 0xf89629465a75e01d}, {0xbe89523386091465, 0xf6bbb397f1135824}, {0xee2ba6c0678b597f, 0x746aa07ded582e2d}, {0x94db483840b717ef, 0xa8c2a44eb4571cdd}, {0xba121a4650e4ddeb, 0x92f34d62616ce414}, {0xe896a0d7e51e1566, 0x77b020baf9c81d18}, {0x915e2486ef32cd60, 0x0ace1474dc1d122f}, {0xb5b5ada8aaff80b8, 0x0d819992132456bb}, {0xe3231912d5bf60e6, 0x10e1fff697ed6c6a}, {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2}, {0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb3}, {0xddd0467c64bce4a0, 0xac7cb3f6d05ddbdf}, {0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96c}, {0xad4ab7112eb3929d, 0x86c16c98d2c953c7}, {0xd89d64d57a607744, 0xe871c7bf077ba8b8}, {0x87625f056c7c4a8b, 0x11471cd764ad4973}, {0xa93af6c6c79b5d2d, 0xd598e40d3dd89bd0}, {0xd389b47879823479, 0x4aff1d108d4ec2c4}, {0x843610cb4bf160cb, 0xcedf722a585139bb}, {0xa54394fe1eedb8fe, 0xc2974eb4ee658829}, {0xce947a3da6a9273e, 0x733d226229feea33}, {0x811ccc668829b887, 0x0806357d5a3f5260}, {0xa163ff802a3426a8, 0xca07c2dcb0cf26f8}, {0xc9bcff6034c13052, 0xfc89b393dd02f0b6}, {0xfc2c3f3841f17c67, 0xbbac2078d443ace3}, {0x9d9ba7832936edc0, 0xd54b944b84aa4c0e}, {0xc5029163f384a931, 0x0a9e795e65d4df12}, {0xf64335bcf065d37d, 0x4d4617b5ff4a16d6}, {0x99ea0196163fa42e, 0x504bced1bf8e4e46}, {0xc06481fb9bcf8d39, 0xe45ec2862f71e1d7}, {0xf07da27a82c37088, 0x5d767327bb4e5a4d}, {0x964e858c91ba2655, 0x3a6a07f8d510f870}, {0xbbe226efb628afea, 0x890489f70a55368c}, {0xeadab0aba3b2dbe5, 0x2b45ac74ccea842f}, {0x92c8ae6b464fc96f, 0x3b0b8bc90012929e}, {0xb77ada0617e3bbcb, 0x09ce6ebb40173745}, {0xe55990879ddcaabd, 0xcc420a6a101d0516}, {0x8f57fa54c2a9eab6, 0x9fa946824a12232e}, {0xb32df8e9f3546564, 0x47939822dc96abfa}, {0xdff9772470297ebd, 0x59787e2b93bc56f8}, {0x8bfbea76c619ef36, 0x57eb4edb3c55b65b}, {0xaefae51477a06b03, 0xede622920b6b23f2}, {0xdab99e59958885c4, 0xe95fab368e45ecee}, {0x88b402f7fd75539b, 0x11dbcb0218ebb415}, {0xaae103b5fcd2a881, 0xd652bdc29f26a11a}, {0xd59944a37c0752a2, 0x4be76d3346f04960}, {0x857fcae62d8493a5, 0x6f70a4400c562ddc}, {0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb953}, {0xd097ad07a71f26b2, 0x7e2000a41346a7a8}, {0x825ecc24c873782f, 0x8ed400668c0c28c9}, {0xa2f67f2dfa90563b, 0x728900802f0f32fb}, {0xcbb41ef979346bca, 0x4f2b40a03ad2ffba}, {0xfea126b7d78186bc, 0xe2f610c84987bfa9}, {0x9f24b832e6b0f436, 0x0dd9ca7d2df4d7ca}, {0xc6ede63fa05d3143, 0x91503d1c79720dbc}, {0xf8a95fcf88747d94, 0x75a44c6397ce912b}, {0x9b69dbe1b548ce7c, 0xc986afbe3ee11abb}, {0xc24452da229b021b, 0xfbe85badce996169}, {0xf2d56790ab41c2a2, 0xfae27299423fb9c4}, {0x97c560ba6b0919a5, 0xdccd879fc967d41b}, {0xbdb6b8e905cb600f, 0x5400e987bbc1c921}, {0xed246723473e3813, 0x290123e9aab23b69}, {0x9436c0760c86e30b, 0xf9a0b6720aaf6522}, {0xb94470938fa89bce, 0xf808e40e8d5b3e6a}, {0xe7958cb87392c2c2, 0xb60b1d1230b20e05}, {0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c3}, {0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af4}, {0xe2280b6c20dd5232, 0x25c6da63c38de1b1}, {0x8d590723948a535f, 0x579c487e5a38ad0f}, {0xb0af48ec79ace837, 0x2d835a9df0c6d852}, {0xdcdb1b2798182244, 0xf8e431456cf88e66}, {0x8a08f0f8bf0f156b, 0x1b8e9ecb641b5900}, {0xac8b2d36eed2dac5, 0xe272467e3d222f40}, {0xd7adf884aa879177, 0x5b0ed81dcc6abb10}, {0x86ccbb52ea94baea, 0x98e947129fc2b4ea}, {0xa87fea27a539e9a5, 0x3f2398d747b36225}, {0xd29fe4b18e88640e, 0x8eec7f0d19a03aae}, {0x83a3eeeef9153e89, 0x1953cf68300424ad}, {0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd8}, {0xcdb02555653131b6, 0x3792f412cb06794e}, {0x808e17555f3ebf11, 0xe2bbd88bbee40bd1}, {0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec5}, {0xc8de047564d20a8b, 0xf245825a5a445276}, {0xfb158592be068d2e, 0xeed6e2f0f0d56713}, {0x9ced737bb6c4183d, 0x55464dd69685606c}, {0xc428d05aa4751e4c, 0xaa97e14c3c26b887}, {0xf53304714d9265df, 0xd53dd99f4b3066a9}, {0x993fe2c6d07b7fab, 0xe546a8038efe402a}, {0xbf8fdb78849a5f96, 0xde98520472bdd034}, {0xef73d256a5c0f77c, 0x963e66858f6d4441}, {0x95a8637627989aad, 0xdde7001379a44aa9}, {0xbb127c53b17ec159, 0x5560c018580d5d53}, {0xe9d71b689dde71af, 0xaab8f01e6e10b4a7}, {0x9226712162ab070d, 0xcab3961304ca70e9}, {0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d23}, {0xe45c10c42a2b3b05, 0x8cb89a7db77c506b}, {0x8eb98a7a9a5b04e3, 0x77f3608e92adb243}, {0xb267ed1940f1c61c, 0x55f038b237591ed4}, {0xdf01e85f912e37a3, 0x6b6c46dec52f6689}, {0x8b61313bbabce2c6, 0x2323ac4b3b3da016}, {0xae397d8aa96c1b77, 0xabec975e0a0d081b}, {0xd9c7dced53c72255, 0x96e7bd358c904a22}, {0x881cea14545c7575, 0x7e50d64177da2e55}, {0xaa242499697392d2, 0xdde50bd1d5d0b9ea}, {0xd4ad2dbfc3d07787, 0x955e4ec64b44e865}, {0x84ec3c97da624ab4, 0xbd5af13bef0b113f}, {0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58f}, {0xcfb11ead453994ba, 0x67de18eda5814af3}, {0x81ceb32c4b43fcf4, 0x80eacf948770ced8}, {0xa2425ff75e14fc31, 0xa1258379a94d028e}, {0xcad2f7f5359a3b3e, 0x096ee45813a04331}, {0xfd87b5f28300ca0d, 0x8bca9d6e188853fd}, {0x9e74d1b791e07e48, 0x775ea264cf55347e}, {0xc612062576589dda, 0x95364afe032a819e}, {0xf79687aed3eec551, 0x3a83ddbd83f52205}, {0x9abe14cd44753b52, 0xc4926a9672793543}, {0xc16d9a0095928a27, 0x75b7053c0f178294}, {0xf1c90080baf72cb1, 0x5324c68b12dd6339}, {0x971da05074da7bee, 0xd3f6fc16ebca5e04}, {0xbce5086492111aea, 0x88f4bb1ca6bcf585}, {0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6}, {0x9392ee8e921d5d07, 0x3aff322e62439fd0}, {0xb877aa3236a4b449, 0x09befeb9fad487c3}, {0xe69594bec44de15b, 0x4c2ebe687989a9b4}, {0x901d7cf73ab0acd9, 0x0f9d37014bf60a11}, {0xb424dc35095cd80f, 0x538484c19ef38c95}, {0xe12e13424bb40e13, 0x2865a5f206b06fba}, {0x8cbccc096f5088cb, 0xf93f87b7442e45d4}, {0xafebff0bcb24aafe, 0xf78f69a51539d749}, {0xdbe6fecebdedd5be, 0xb573440e5a884d1c}, {0x89705f4136b4a597, 0x31680a88f8953031}, {0xabcc77118461cefc, 0xfdc20d2b36ba7c3e}, {0xd6bf94d5e57a42bc, 0x3d32907604691b4d}, {0x8637bd05af6c69b5, 0xa63f9a49c2c1b110}, {0xa7c5ac471b478423, 0x0fcf80dc33721d54}, {0xd1b71758e219652b, 0xd3c36113404ea4a9}, {0x83126e978d4fdf3b, 0x645a1cac083126ea}, {0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4}, {0xcccccccccccccccc, 0xcccccccccccccccd}, {0x8000000000000000, 0x0000000000000000}, {0xa000000000000000, 0x0000000000000000}, {0xc800000000000000, 0x0000000000000000}, {0xfa00000000000000, 0x0000000000000000}, {0x9c40000000000000, 0x0000000000000000}, {0xc350000000000000, 0x0000000000000000}, {0xf424000000000000, 0x0000000000000000}, {0x9896800000000000, 0x0000000000000000}, {0xbebc200000000000, 0x0000000000000000}, {0xee6b280000000000, 0x0000000000000000}, {0x9502f90000000000, 0x0000000000000000}, {0xba43b74000000000, 0x0000000000000000}, {0xe8d4a51000000000, 0x0000000000000000}, {0x9184e72a00000000, 0x0000000000000000}, {0xb5e620f480000000, 0x0000000000000000}, {0xe35fa931a0000000, 0x0000000000000000}, {0x8e1bc9bf04000000, 0x0000000000000000}, {0xb1a2bc2ec5000000, 0x0000000000000000}, {0xde0b6b3a76400000, 0x0000000000000000}, {0x8ac7230489e80000, 0x0000000000000000}, {0xad78ebc5ac620000, 0x0000000000000000}, {0xd8d726b7177a8000, 0x0000000000000000}, {0x878678326eac9000, 0x0000000000000000}, {0xa968163f0a57b400, 0x0000000000000000}, {0xd3c21bcecceda100, 0x0000000000000000}, {0x84595161401484a0, 0x0000000000000000}, {0xa56fa5b99019a5c8, 0x0000000000000000}, {0xcecb8f27f4200f3a, 0x0000000000000000}, {0x813f3978f8940984, 0x4000000000000000}, {0xa18f07d736b90be5, 0x5000000000000000}, {0xc9f2c9cd04674ede, 0xa400000000000000}, {0xfc6f7c4045812296, 0x4d00000000000000}, {0x9dc5ada82b70b59d, 0xf020000000000000}, {0xc5371912364ce305, 0x6c28000000000000}, {0xf684df56c3e01bc6, 0xc732000000000000}, {0x9a130b963a6c115c, 0x3c7f400000000000}, {0xc097ce7bc90715b3, 0x4b9f100000000000}, {0xf0bdc21abb48db20, 0x1e86d40000000000}, {0x96769950b50d88f4, 0x1314448000000000}, {0xbc143fa4e250eb31, 0x17d955a000000000}, {0xeb194f8e1ae525fd, 0x5dcfab0800000000}, {0x92efd1b8d0cf37be, 0x5aa1cae500000000}, {0xb7abc627050305ad, 0xf14a3d9e40000000}, {0xe596b7b0c643c719, 0x6d9ccd05d0000000}, {0x8f7e32ce7bea5c6f, 0xe4820023a2000000}, {0xb35dbf821ae4f38b, 0xdda2802c8a800000}, {0xe0352f62a19e306e, 0xd50b2037ad200000}, {0x8c213d9da502de45, 0x4526f422cc340000}, {0xaf298d050e4395d6, 0x9670b12b7f410000}, {0xdaf3f04651d47b4c, 0x3c0cdd765f114000}, {0x88d8762bf324cd0f, 0xa5880a69fb6ac800}, {0xab0e93b6efee0053, 0x8eea0d047a457a00}, {0xd5d238a4abe98068, 0x72a4904598d6d880}, {0x85a36366eb71f041, 0x47a6da2b7f864750}, {0xa70c3c40a64e6c51, 0x999090b65f67d924}, {0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d}, {0x82818f1281ed449f, 0xbff8f10e7a8921a4}, {0xa321f2d7226895c7, 0xaff72d52192b6a0d}, {0xcbea6f8ceb02bb39, 0x9bf4f8a69f764490}, {0xfee50b7025c36a08, 0x02f236d04753d5b4}, {0x9f4f2726179a2245, 0x01d762422c946590}, {0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef5}, {0xf8ebad2b84e0d58b, 0xd2e0898765a7deb2}, {0x9b934c3b330c8577, 0x63cc55f49f88eb2f}, {0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fb}, {0xf316271c7fc3908a, 0x8bef464e3945ef7a}, {0x97edd871cfda3a56, 0x97758bf0e3cbb5ac}, {0xbde94e8e43d0c8ec, 0x3d52eeed1cbea317}, {0xed63a231d4c4fb27, 0x4ca7aaa863ee4bdd}, {0x945e455f24fb1cf8, 0x8fe8caa93e74ef6a}, {0xb975d6b6ee39e436, 0xb3e2fd538e122b44}, {0xe7d34c64a9c85d44, 0x60dbbca87196b616}, {0x90e40fbeea1d3a4a, 0xbc8955e946fe31cd}, {0xb51d13aea4a488dd, 0x6babab6398bdbe41}, {0xe264589a4dcdab14, 0xc696963c7eed2dd1}, {0x8d7eb76070a08aec, 0xfc1e1de5cf543ca2}, {0xb0de65388cc8ada8, 0x3b25a55f43294bcb}, {0xdd15fe86affad912, 0x49ef0eb713f39ebe}, {0x8a2dbf142dfcc7ab, 0x6e3569326c784337}, {0xacb92ed9397bf996, 0x49c2c37f07965404}, {0xd7e77a8f87daf7fb, 0xdc33745ec97be906}, {0x86f0ac99b4e8dafd, 0x69a028bb3ded71a3}, {0xa8acd7c0222311bc, 0xc40832ea0d68ce0c}, {0xd2d80db02aabd62b, 0xf50a3fa490c30190}, {0x83c7088e1aab65db, 0x792667c6da79e0fa}, {0xa4b8cab1a1563f52, 0x577001b891185938}, {0xcde6fd5e09abcf26, 0xed4c0226b55e6f86}, {0x80b05e5ac60b6178, 0x544f8158315b05b4}, {0xa0dc75f1778e39d6, 0x696361ae3db1c721}, {0xc913936dd571c84c, 0x03bc3a19cd1e38e9}, {0xfb5878494ace3a5f, 0x04ab48a04065c723}, {0x9d174b2dcec0e47b, 0x62eb0d64283f9c76}, {0xc45d1df942711d9a, 0x3ba5d0bd324f8394}, {0xf5746577930d6500, 0xca8f44ec7ee36479}, {0x9968bf6abbe85f20, 0x7e998b13cf4e1ecb}, {0xbfc2ef456ae276e8, 0x9e3fedd8c321a67e}, {0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101e}, {0x95d04aee3b80ece5, 0xbba1f1d158724a12}, {0xbb445da9ca61281f, 0x2a8a6e45ae8edc97}, {0xea1575143cf97226, 0xf52d09d71a3293bd}, {0x924d692ca61be758, 0x593c2626705f9c56}, {0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836c}, {0xe498f455c38b997a, 0x0b6dfb9c0f956447}, {0x8edf98b59a373fec, 0x4724bd4189bd5eac}, {0xb2977ee300c50fe7, 0x58edec91ec2cb657}, {0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ed}, {0x8b865b215899f46c, 0xbd79e0d20082ee74}, {0xae67f1e9aec07187, 0xecd8590680a3aa11}, {0xda01ee641a708de9, 0xe80e6f4820cc9495}, {0x884134fe908658b2, 0x3109058d147fdcdd}, {0xaa51823e34a7eede, 0xbd4b46f0599fd415}, {0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91a}, {0x850fadc09923329e, 0x03e2cf6bc604ddb0}, {0xa6539930bf6bff45, 0x84db8346b786151c}, {0xcfe87f7cef46ff16, 0xe612641865679a63}, {0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07e}, {0xa26da3999aef7749, 0xe3be5e330f38f09d}, {0xcb090c8001ab551c, 0x5cadf5bfd3072cc5}, {0xfdcb4fa002162a63, 0x73d9732fc7c8f7f6}, {0x9e9f11c4014dda7e, 0x2867e7fddcdd9afa}, {0xc646d63501a1511d, 0xb281e1fd541501b8}, {0xf7d88bc24209a565, 0x1f225a7ca91a4226}, {0x9ae757596946075f, 0x3375788de9b06958}, {0xc1a12d2fc3978937, 0x0052d6b1641c83ae}, {0xf209787bb47d6b84, 0xc0678c5dbd23a49a}, {0x9745eb4d50ce6332, 0xf840b7ba963646e0}, {0xbd176620a501fbff, 0xb650e5a93bc3d898}, {0xec5d3fa8ce427aff, 0xa3e51f138ab4cebe}, {0x93ba47c980e98cdf, 0xc66f336c36b10137}, {0xb8a8d9bbe123f017, 0xb80b0047445d4184}, {0xe6d3102ad96cec1d, 0xa60dc059157491e5}, {0x9043ea1ac7e41392, 0x87c89837ad68db2f}, {0xb454e4a179dd1877, 0x29babe4598c311fb}, {0xe16a1dc9d8545e94, 0xf4296dd6fef3d67a}, {0x8ce2529e2734bb1d, 0x1899e4a65f58660c}, {0xb01ae745b101e9e4, 0x5ec05dcff72e7f8f}, {0xdc21a1171d42645d, 0x76707543f4fa1f73}, {0x899504ae72497eba, 0x6a06494a791c53a8}, {0xabfa45da0edbde69, 0x0487db9d17636892}, {0xd6f8d7509292d603, 0x45a9d2845d3c42b6}, {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b2}, {0xa7f26836f282b732, 0x8e6cac7768d7141e}, {0xd1ef0244af2364ff, 0x3207d795430cd926}, {0x8335616aed761f1f, 0x7f44e6bd49e807b8}, {0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a6}, {0xcd036837130890a1, 0x36dba887c37a8c0f}, {0x802221226be55a64, 0xc2494954da2c9789}, {0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6c}, {0xc83553c5c8965d3d, 0x6f92829494e5acc7}, {0xfa42a8b73abbf48c, 0xcb772339ba1f17f9}, {0x9c69a97284b578d7, 0xff2a760414536efb}, {0xc38413cf25e2d70d, 0xfef5138519684aba}, {0xf46518c2ef5b8cd1, 0x7eb258665fc25d69}, {0x98bf2f79d5993802, 0xef2f773ffbd97a61}, {0xbeeefb584aff8603, 0xaafb550ffacfd8fa}, {0xeeaaba2e5dbf6784, 0x95ba2a53f983cf38}, {0x952ab45cfa97a0b2, 0xdd945a747bf26183}, {0xba756174393d88df, 0x94f971119aeef9e4}, {0xe912b9d1478ceb17, 0x7a37cd5601aab85d}, {0x91abb422ccb812ee, 0xac62e055c10ab33a}, {0xb616a12b7fe617aa, 0x577b986b314d6009}, {0xe39c49765fdf9d94, 0xed5a7e85fda0b80b}, {0x8e41ade9fbebc27d, 0x14588f13be847307}, {0xb1d219647ae6b31c, 0x596eb2d8ae258fc8}, {0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bb}, {0x8aec23d680043bee, 0x25de7bb9480d5854}, {0xada72ccc20054ae9, 0xaf561aa79a10ae6a}, {0xd910f7ff28069da4, 0x1b2ba1518094da04}, {0x87aa9aff79042286, 0x90fb44d2f05d0842}, {0xa99541bf57452b28, 0x353a1607ac744a53}, {0xd3fa922f2d1675f2, 0x42889b8997915ce8}, {0x847c9b5d7c2e09b7, 0x69956135febada11}, {0xa59bc234db398c25, 0x43fab9837e699095}, {0xcf02b2c21207ef2e, 0x94f967e45e03f4bb}, {0x8161afb94b44f57d, 0x1d1be0eebac278f5}, {0xa1ba1ba79e1632dc, 0x6462d92a69731732}, {0xca28a291859bbf93, 0x7d7b8f7503cfdcfe}, {0xfcb2cb35e702af78, 0x5cda735244c3d43e}, {0x9defbf01b061adab, 0x3a0888136afa64a7}, {0xc56baec21c7a1916, 0x088aaa1845b8fdd0}, {0xf6c69a72a3989f5b, 0x8aad549e57273d45}, {0x9a3c2087a63f6399, 0x36ac54e2f678864b}, {0xc0cb28a98fcf3c7f, 0x84576a1bb416a7dd}, {0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d5}, {0x969eb7c47859e743, 0x9f644ae5a4b1b325}, {0xbc4665b596706114, 0x873d5d9f0dde1fee}, {0xeb57ff22fc0c7959, 0xa90cb506d155a7ea}, {0x9316ff75dd87cbd8, 0x09a7f12442d588f2}, {0xb7dcbf5354e9bece, 0x0c11ed6d538aeb2f}, {0xe5d3ef282a242e81, 0x8f1668c8a86da5fa}, {0x8fa475791a569d10, 0xf96e017d694487bc}, {0xb38d92d760ec4455, 0x37c981dcc395a9ac}, {0xe070f78d3927556a, 0x85bbe253f47b1417}, {0x8c469ab843b89562, 0x93956d7478ccec8e}, {0xaf58416654a6babb, 0x387ac8d1970027b2}, {0xdb2e51bfe9d0696a, 0x06997b05fcc0319e}, {0x88fcf317f22241e2, 0x441fece3bdf81f03}, {0xab3c2fddeeaad25a, 0xd527e81cad7626c3}, {0xd60b3bd56a5586f1, 0x8a71e223d8d3b074}, {0x85c7056562757456, 0xf6872d5667844e49}, {0xa738c6bebb12d16c, 0xb428f8ac016561db}, {0xd106f86e69d785c7, 0xe13336d701beba52}, {0x82a45b450226b39c, 0xecc0024661173473}, {0xa34d721642b06084, 0x27f002d7f95d0190}, {0xcc20ce9bd35c78a5, 0x31ec038df7b441f4}, {0xff290242c83396ce, 0x7e67047175a15271}, {0x9f79a169bd203e41, 0x0f0062c6e984d386}, {0xc75809c42c684dd1, 0x52c07b78a3e60868}, {0xf92e0c3537826145, 0xa7709a56ccdf8a82}, {0x9bbcc7a142b17ccb, 0x88a66076400bb691}, {0xc2abf989935ddbfe, 0x6acff893d00ea435}, {0xf356f7ebf83552fe, 0x0583f6b8c4124d43}, {0x98165af37b2153de, 0xc3727a337a8b704a}, {0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5c}, {0xeda2ee1c7064130c, 0x1162def06f79df73}, {0x9485d4d1c63e8be7, 0x8addcb5645ac2ba8}, {0xb9a74a0637ce2ee1, 0x6d953e2bd7173692}, {0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0437}, {0x910ab1d4db9914a0, 0x1d9c9892400a22a2}, {0xb54d5e4a127f59c8, 0x2503beb6d00cab4b}, {0xe2a0b5dc971f303a, 0x2e44ae64840fd61d}, {0x8da471a9de737e24, 0x5ceaecfed289e5d2}, {0xb10d8e1456105dad, 0x7425a83e872c5f47}, {0xdd50f1996b947518, 0xd12f124e28f77719}, {0x8a5296ffe33cc92f, 0x82bd6b70d99aaa6f}, {0xace73cbfdc0bfb7b, 0x636cc64d1001550b}, {0xd8210befd30efa5a, 0x3c47f7e05401aa4e}, {0x8714a775e3e95c78, 0x65acfaec34810a71}, {0xa8d9d1535ce3b396, 0x7f1839a741a14d0d}, {0xd31045a8341ca07c, 0x1ede48111209a050}, {0x83ea2b892091e44d, 0x934aed0aab460432}, {0xa4e4b66b68b65d60, 0xf81da84d5617853f}, {0xce1de40642e3f4b9, 0x36251260ab9d668e}, {0x80d2ae83e9ce78f3, 0xc1d72b7c6b426019}, {0xa1075a24e4421730, 0xb24cf65b8612f81f}, {0xc94930ae1d529cfc, 0xdee033f26797b627}, {0xfb9b7cd9a4a7443c, 0x169840ef017da3b1}, {0x9d412e0806e88aa5, 0x8e1f289560ee864e}, {0xc491798a08a2ad4e, 0xf1a6f2bab92a27e2}, {0xf5b5d7ec8acb58a2, 0xae10af696774b1db}, {0x9991a6f3d6bf1765, 0xacca6da1e0a8ef29}, {0xbff610b0cc6edd3f, 0x17fd090a58d32af3}, {0xeff394dcff8a948e, 0xddfc4b4cef07f5b0}, {0x95f83d0a1fb69cd9, 0x4abdaf101564f98e}, {0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f1}, {0xea53df5fd18d5513, 0x84c86189216dc5ed}, {0x92746b9be2f8552c, 0x32fd3cf5b4e49bb4}, {0xb7118682dbb66a77, 0x3fbc8c33221dc2a1}, {0xe4d5e82392a40515, 0x0fabaf3feaa5334a}, {0x8f05b1163ba6832d, 0x29cb4d87f2a7400e}, {0xb2c71d5bca9023f8, 0x743e20e9ef511012}, {0xdf78e4b2bd342cf6, 0x914da9246b255416}, {0x8bab8eefb6409c1a, 0x1ad089b6c2f7548e}, {0xae9672aba3d0c320, 0xa184ac2473b529b1}, {0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741e}, {0x8865899617fb1871, 0x7e2fa67c7a658892}, {0xaa7eebfb9df9de8d, 0xddbb901b98feeab7}, {0xd51ea6fa85785631, 0x552a74227f3ea565}, {0x8533285c936b35de, 0xd53a88958f87275f}, {0xa67ff273b8460356, 0x8a892abaf368f137}, {0xd01fef10a657842c, 0x2d2b7569b0432d85}, {0x8213f56a67f6b29b, 0x9c3b29620e29fc73}, {0xa298f2c501f45f42, 0x8349f3ba91b47b8f}, {0xcb3f2f7642717713, 0x241c70a936219a73}, {0xfe0efb53d30dd4d7, 0xed238cd383aa0110}, {0x9ec95d1463e8a506, 0xf4363804324a40aa}, {0xc67bb4597ce2ce48, 0xb143c6053edcd0d5}, {0xf81aa16fdc1b81da, 0xdd94b7868e94050a}, {0x9b10a4e5e9913128, 0xca7cf2b4191c8326}, {0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f0}, {0xf24a01a73cf2dccf, 0xbc633b39673c8cec}, {0x976e41088617ca01, 0xd5be0503e085d813}, {0xbd49d14aa79dbc82, 0x4b2d8644d8a74e18}, {0xec9c459d51852ba2, 0xddf8e7d60ed1219e}, {0x93e1ab8252f33b45, 0xcabb90e5c942b503}, {0xb8da1662e7b00a17, 0x3d6a751f3b936243}, {0xe7109bfba19c0c9d, 0x0cc512670a783ad4}, {0x906a617d450187e2, 0x27fb2b80668b24c5}, {0xb484f9dc9641e9da, 0xb1f9f660802dedf6}, {0xe1a63853bbd26451, 0x5e7873f8a0396973}, {0x8d07e33455637eb2, 0xdb0b487b6423e1e8}, {0xb049dc016abc5e5f, 0x91ce1a9a3d2cda62}, {0xdc5c5301c56b75f7, 0x7641a140cc7810fb}, {0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9d}, {0xac2820d9623bf429, 0x546345fa9fbdcd44}, {0xd732290fbacaf133, 0xa97c177947ad4095}, {0x867f59a9d4bed6c0, 0x49ed8eabcccc485d}, {0xa81f301449ee8c70, 0x5c68f256bfff5a74}, {0xd226fc195c6a2f8c, 0x73832eec6fff3111}, {0x83585d8fd9c25db7, 0xc831fd53c5ff7eab}, {0xa42e74f3d032f525, 0xba3e7ca8b77f5e55}, {0xcd3a1230c43fb26f, 0x28ce1bd2e55f35eb}, {0x80444b5e7aa7cf85, 0x7980d163cf5b81b3}, {0xa0555e361951c366, 0xd7e105bcc332621f}, {0xc86ab5c39fa63440, 0x8dd9472bf3fefaa7}, {0xfa856334878fc150, 0xb14f98f6f0feb951}, {0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d3}, {0xc3b8358109e84f07, 0x0a862f80ec4700c8}, {0xf4a642e14c6262c8, 0xcd27bb612758c0fa}, {0x98e7e9cccfbd7dbd, 0x8038d51cb897789c}, {0xbf21e44003acdd2c, 0xe0470a63e6bd56c3}, {0xeeea5d5004981478, 0x1858ccfce06cac74}, {0x95527a5202df0ccb, 0x0f37801e0c43ebc8}, {0xbaa718e68396cffd, 0xd30560258f54e6ba}, {0xe950df20247c83fd, 0x47c6b82ef32a2069}, {0x91d28b7416cdd27e, 0x4cdc331d57fa5441}, {0xb6472e511c81471d, 0xe0133fe4adf8e952}, {0xe3d8f9e563a198e5, 0x58180fddd97723a6}, {0x8e679c2f5e44ff8f, 0x570f09eaa7ea7648}, {0xb201833b35d63f73, 0x2cd2cc6551e513da}, {0xde81e40a034bcf4f, 0xf8077f7ea65e58d1}, {0x8b112e86420f6191, 0xfb04afaf27faf782}, {0xadd57a27d29339f6, 0x79c5db9af1f9b563}, {0xd94ad8b1c7380874, 0x18375281ae7822bc}, {0x87cec76f1c830548, 0x8f2293910d0b15b5}, {0xa9c2794ae3a3c69a, 0xb2eb3875504ddb22}, {0xd433179d9c8cb841, 0x5fa60692a46151eb}, {0x849feec281d7f328, 0xdbc7c41ba6bcd333}, {0xa5c7ea73224deff3, 0x12b9b522906c0800}, {0xcf39e50feae16bef, 0xd768226b34870a00}, {0x81842f29f2cce375, 0xe6a1158300d46640}, {0xa1e53af46f801c53, 0x60495ae3c1097fd0}, {0xca5e89b18b602368, 0x385bb19cb14bdfc4}, {0xfcf62c1dee382c42, 0x46729e03dd9ed7b5}, {0x9e19db92b4e31ba9, 0x6c07a2c26a8346d1}, {0xc5a05277621be293, 0xc7098b7305241885}, { 0xf70867153aa2db38, 0xb8cbee4fc66d1ea7 } #else {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f}, {0x86a8d39ef77164bc, 0xae5dff9c02033198}, {0xd98ddaee19068c76, 0x3badd624dd9b0958}, {0xafbd2350644eeacf, 0xe5d1929ef90898fb}, {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2}, {0xe55990879ddcaabd, 0xcc420a6a101d0516}, {0xb94470938fa89bce, 0xf808e40e8d5b3e6a}, {0x95a8637627989aad, 0xdde7001379a44aa9}, {0xf1c90080baf72cb1, 0x5324c68b12dd6339}, {0xc350000000000000, 0x0000000000000000}, {0x9dc5ada82b70b59d, 0xf020000000000000}, {0xfee50b7025c36a08, 0x02f236d04753d5b4}, {0xcde6fd5e09abcf26, 0xed4c0226b55e6f86}, {0xa6539930bf6bff45, 0x84db8346b786151c}, {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b2}, {0xd910f7ff28069da4, 0x1b2ba1518094da04}, {0xaf58416654a6babb, 0x387ac8d1970027b2}, {0x8da471a9de737e24, 0x5ceaecfed289e5d2}, {0xe4d5e82392a40515, 0x0fabaf3feaa5334a}, {0xb8da1662e7b00a17, 0x3d6a751f3b936243}, { 0x95527a5202df0ccb, 0x0f37801e0c43ebc8 } #endif }; #if FMT_USE_FULL_CACHE_DRAGONBOX return pow10_significands[k - float_info::min_k]; #else static constexpr const uint64_t powers_of_5_64[] = { 0x0000000000000001, 0x0000000000000005, 0x0000000000000019, 0x000000000000007d, 0x0000000000000271, 0x0000000000000c35, 0x0000000000003d09, 0x000000000001312d, 0x000000000005f5e1, 0x00000000001dcd65, 0x00000000009502f9, 0x0000000002e90edd, 0x000000000e8d4a51, 0x0000000048c27395, 0x000000016bcc41e9, 0x000000071afd498d, 0x0000002386f26fc1, 0x000000b1a2bc2ec5, 0x000003782dace9d9, 0x00001158e460913d, 0x000056bc75e2d631, 0x0001b1ae4d6e2ef5, 0x000878678326eac9, 0x002a5a058fc295ed, 0x00d3c21bcecceda1, 0x0422ca8b0a00a425, 0x14adf4b7320334b9}; static constexpr const uint32_t pow10_recovery_errors[] = { 0x50001400, 0x54044100, 0x54014555, 0x55954415, 0x54115555, 0x00000001, 0x50000000, 0x00104000, 0x54010004, 0x05004001, 0x55555544, 0x41545555, 0x54040551, 0x15445545, 0x51555514, 0x10000015, 0x00101100, 0x01100015, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04450514, 0x45414110, 0x55555145, 0x50544050, 0x15040155, 0x11054140, 0x50111514, 0x11451454, 0x00400541, 0x00000000, 0x55555450, 0x10056551, 0x10054011, 0x55551014, 0x69514555, 0x05151109, 0x00155555}; static const int compression_ratio = 27; // Compute base index. int cache_index = (k - float_info::min_k) / compression_ratio; int kb = cache_index * compression_ratio + float_info::min_k; int offset = k - kb; // Get base cache. uint128_wrapper base_cache = pow10_significands[cache_index]; if (offset == 0) return base_cache; // Compute the required amount of bit-shift. int alpha = floor_log2_pow10(kb + offset) - floor_log2_pow10(kb) - offset; FMT_ASSERT(alpha > 0 && alpha < 64, "shifting error detected"); // Try to recover the real cache. uint64_t pow5 = powers_of_5_64[offset]; uint128_wrapper recovered_cache = umul128(base_cache.high(), pow5); uint128_wrapper middle_low = umul128(base_cache.low() - (kb < 0 ? 1u : 0u), pow5); recovered_cache += middle_low.high(); uint64_t high_to_middle = recovered_cache.high() << (64 - alpha); uint64_t middle_to_low = recovered_cache.low() << (64 - alpha); recovered_cache = uint128_wrapper{(recovered_cache.low() >> alpha) | high_to_middle, ((middle_low.low() >> alpha) | middle_to_low)}; if (kb < 0) recovered_cache += 1; // Get error. int error_idx = (k - float_info::min_k) / 16; uint32_t error = (pow10_recovery_errors[error_idx] >> ((k - float_info::min_k) % 16) * 2) & 0x3; // Add the error back. FMT_ASSERT(recovered_cache.low() + error >= recovered_cache.low(), ""); return {recovered_cache.high(), recovered_cache.low() + error}; #endif } static carrier_uint compute_mul(carrier_uint u, const cache_entry_type& cache) FMT_NOEXCEPT { return umul192_upper64(u, cache); } static uint32_t compute_delta(cache_entry_type const& cache, int beta_minus_1) FMT_NOEXCEPT { return static_cast(cache.high() >> (64 - 1 - beta_minus_1)); } static bool compute_mul_parity(carrier_uint two_f, const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { FMT_ASSERT(beta_minus_1 >= 1, ""); FMT_ASSERT(beta_minus_1 < 64, ""); return ((umul192_middle64(two_f, cache) >> (64 - beta_minus_1)) & 1) != 0; } static carrier_uint compute_left_endpoint_for_shorter_interval_case( const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { return (cache.high() - (cache.high() >> (float_info::significand_bits + 2))) >> (64 - float_info::significand_bits - 1 - beta_minus_1); } static carrier_uint compute_right_endpoint_for_shorter_interval_case( const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { return (cache.high() + (cache.high() >> (float_info::significand_bits + 1))) >> (64 - float_info::significand_bits - 1 - beta_minus_1); } static carrier_uint compute_round_up_for_shorter_interval_case( const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { return ((cache.high() >> (64 - float_info::significand_bits - 2 - beta_minus_1)) + 1) / 2; } }; // Various integer checks template bool is_left_endpoint_integer_shorter_interval(int exponent) FMT_NOEXCEPT { return exponent >= float_info< T>::case_shorter_interval_left_endpoint_lower_threshold && exponent <= float_info::case_shorter_interval_left_endpoint_upper_threshold; } template bool is_endpoint_integer(typename float_info::carrier_uint two_f, int exponent, int minus_k) FMT_NOEXCEPT { if (exponent < float_info::case_fc_pm_half_lower_threshold) return false; // For k >= 0. if (exponent <= float_info::case_fc_pm_half_upper_threshold) return true; // For k < 0. if (exponent > float_info::divisibility_check_by_5_threshold) return false; return divisible_by_power_of_5(two_f, minus_k); } template bool is_center_integer(typename float_info::carrier_uint two_f, int exponent, int minus_k) FMT_NOEXCEPT { // Exponent for 5 is negative. if (exponent > float_info::divisibility_check_by_5_threshold) return false; if (exponent > float_info::case_fc_upper_threshold) return divisible_by_power_of_5(two_f, minus_k); // Both exponents are nonnegative. if (exponent >= float_info::case_fc_lower_threshold) return true; // Exponent for 2 is negative. return divisible_by_power_of_2(two_f, minus_k - exponent + 1); } // Remove trailing zeros from n and return the number of zeros removed (float) FMT_INLINE int remove_trailing_zeros(uint32_t& n) FMT_NOEXCEPT { #ifdef FMT_BUILTIN_CTZ int t = FMT_BUILTIN_CTZ(n); #else int t = ctz(n); #endif if (t > float_info::max_trailing_zeros) t = float_info::max_trailing_zeros; const uint32_t mod_inv1 = 0xcccccccd; const uint32_t max_quotient1 = 0x33333333; const uint32_t mod_inv2 = 0xc28f5c29; const uint32_t max_quotient2 = 0x0a3d70a3; int s = 0; for (; s < t - 1; s += 2) { if (n * mod_inv2 > max_quotient2) break; n *= mod_inv2; } if (s < t && n * mod_inv1 <= max_quotient1) { n *= mod_inv1; ++s; } n >>= s; return s; } // Removes trailing zeros and returns the number of zeros removed (double) FMT_INLINE int remove_trailing_zeros(uint64_t& n) FMT_NOEXCEPT { #ifdef FMT_BUILTIN_CTZLL int t = FMT_BUILTIN_CTZLL(n); #else int t = ctzll(n); #endif if (t > float_info::max_trailing_zeros) t = float_info::max_trailing_zeros; // Divide by 10^8 and reduce to 32-bits // Since ret_value.significand <= (2^64 - 1) / 1000 < 10^17, // both of the quotient and the r should fit in 32-bits const uint32_t mod_inv1 = 0xcccccccd; const uint32_t max_quotient1 = 0x33333333; const uint64_t mod_inv8 = 0xc767074b22e90e21; const uint64_t max_quotient8 = 0x00002af31dc46118; // If the number is divisible by 1'0000'0000, work with the quotient if (t >= 8) { auto quotient_candidate = n * mod_inv8; if (quotient_candidate <= max_quotient8) { auto quotient = static_cast(quotient_candidate >> 8); int s = 8; for (; s < t; ++s) { if (quotient * mod_inv1 > max_quotient1) break; quotient *= mod_inv1; } quotient >>= (s - 8); n = quotient; return s; } } // Otherwise, work with the remainder auto quotient = static_cast(n / 100000000); auto remainder = static_cast(n - 100000000 * quotient); if (t == 0 || remainder * mod_inv1 > max_quotient1) { return 0; } remainder *= mod_inv1; if (t == 1 || remainder * mod_inv1 > max_quotient1) { n = (remainder >> 1) + quotient * 10000000ull; return 1; } remainder *= mod_inv1; if (t == 2 || remainder * mod_inv1 > max_quotient1) { n = (remainder >> 2) + quotient * 1000000ull; return 2; } remainder *= mod_inv1; if (t == 3 || remainder * mod_inv1 > max_quotient1) { n = (remainder >> 3) + quotient * 100000ull; return 3; } remainder *= mod_inv1; if (t == 4 || remainder * mod_inv1 > max_quotient1) { n = (remainder >> 4) + quotient * 10000ull; return 4; } remainder *= mod_inv1; if (t == 5 || remainder * mod_inv1 > max_quotient1) { n = (remainder >> 5) + quotient * 1000ull; return 5; } remainder *= mod_inv1; if (t == 6 || remainder * mod_inv1 > max_quotient1) { n = (remainder >> 6) + quotient * 100ull; return 6; } remainder *= mod_inv1; n = (remainder >> 7) + quotient * 10ull; return 7; } // The main algorithm for shorter interval case template FMT_INLINE decimal_fp shorter_interval_case(int exponent) FMT_NOEXCEPT { decimal_fp ret_value; // Compute k and beta const int minus_k = floor_log10_pow2_minus_log10_4_over_3(exponent); const int beta_minus_1 = exponent + floor_log2_pow10(-minus_k); // Compute xi and zi using cache_entry_type = typename cache_accessor::cache_entry_type; const cache_entry_type cache = cache_accessor::get_cached_power(-minus_k); auto xi = cache_accessor::compute_left_endpoint_for_shorter_interval_case( cache, beta_minus_1); auto zi = cache_accessor::compute_right_endpoint_for_shorter_interval_case( cache, beta_minus_1); // If the left endpoint is not an integer, increase it if (!is_left_endpoint_integer_shorter_interval(exponent)) ++xi; // Try bigger divisor ret_value.significand = zi / 10; // If succeed, remove trailing zeros if necessary and return if (ret_value.significand * 10 >= xi) { ret_value.exponent = minus_k + 1; ret_value.exponent += remove_trailing_zeros(ret_value.significand); return ret_value; } // Otherwise, compute the round-up of y ret_value.significand = cache_accessor::compute_round_up_for_shorter_interval_case( cache, beta_minus_1); ret_value.exponent = minus_k; // When tie occurs, choose one of them according to the rule if (exponent >= float_info::shorter_interval_tie_lower_threshold && exponent <= float_info::shorter_interval_tie_upper_threshold) { ret_value.significand = ret_value.significand % 2 == 0 ? ret_value.significand : ret_value.significand - 1; } else if (ret_value.significand < xi) { ++ret_value.significand; } return ret_value; } template decimal_fp to_decimal(T x) FMT_NOEXCEPT { // Step 1: integer promotion & Schubfach multiplier calculation. using carrier_uint = typename float_info::carrier_uint; using cache_entry_type = typename cache_accessor::cache_entry_type; auto br = bit_cast(x); // Extract significand bits and exponent bits. const carrier_uint significand_mask = (static_cast(1) << float_info::significand_bits) - 1; carrier_uint significand = (br & significand_mask); int exponent = static_cast((br & exponent_mask()) >> float_info::significand_bits); if (exponent != 0) { // Check if normal. exponent += float_info::exponent_bias - float_info::significand_bits; // Shorter interval case; proceed like Schubfach. if (significand == 0) return shorter_interval_case(exponent); significand |= (static_cast(1) << float_info::significand_bits); } else { // Subnormal case; the interval is always regular. if (significand == 0) return {0, 0}; exponent = float_info::min_exponent - float_info::significand_bits; } const bool include_left_endpoint = (significand % 2 == 0); const bool include_right_endpoint = include_left_endpoint; // Compute k and beta. const int minus_k = floor_log10_pow2(exponent) - float_info::kappa; const cache_entry_type cache = cache_accessor::get_cached_power(-minus_k); const int beta_minus_1 = exponent + floor_log2_pow10(-minus_k); // Compute zi and deltai // 10^kappa <= deltai < 10^(kappa + 1) const uint32_t deltai = cache_accessor::compute_delta(cache, beta_minus_1); const carrier_uint two_fc = significand << 1; const carrier_uint two_fr = two_fc | 1; const carrier_uint zi = cache_accessor::compute_mul(two_fr << beta_minus_1, cache); // Step 2: Try larger divisor; remove trailing zeros if necessary // Using an upper bound on zi, we might be able to optimize the division // better than the compiler; we are computing zi / big_divisor here decimal_fp ret_value; ret_value.significand = divide_by_10_to_kappa_plus_1(zi); uint32_t r = static_cast(zi - float_info::big_divisor * ret_value.significand); if (r > deltai) { goto small_divisor_case_label; } else if (r < deltai) { // Exclude the right endpoint if necessary if (r == 0 && !include_right_endpoint && is_endpoint_integer(two_fr, exponent, minus_k)) { --ret_value.significand; r = float_info::big_divisor; goto small_divisor_case_label; } } else { // r == deltai; compare fractional parts // Check conditions in the order different from the paper // to take advantage of short-circuiting const carrier_uint two_fl = two_fc - 1; if ((!include_left_endpoint || !is_endpoint_integer(two_fl, exponent, minus_k)) && !cache_accessor::compute_mul_parity(two_fl, cache, beta_minus_1)) { goto small_divisor_case_label; } } ret_value.exponent = minus_k + float_info::kappa + 1; // We may need to remove trailing zeros ret_value.exponent += remove_trailing_zeros(ret_value.significand); return ret_value; // Step 3: Find the significand with the smaller divisor small_divisor_case_label: ret_value.significand *= 10; ret_value.exponent = minus_k + float_info::kappa; const uint32_t mask = (1u << float_info::kappa) - 1; auto dist = r - (deltai / 2) + (float_info::small_divisor / 2); // Is dist divisible by 2^kappa? if ((dist & mask) == 0) { const bool approx_y_parity = ((dist ^ (float_info::small_divisor / 2)) & 1) != 0; dist >>= float_info::kappa; // Is dist divisible by 5^kappa? if (check_divisibility_and_divide_by_pow5::kappa>(dist)) { ret_value.significand += dist; // Check z^(f) >= epsilon^(f) // We have either yi == zi - epsiloni or yi == (zi - epsiloni) - 1, // where yi == zi - epsiloni if and only if z^(f) >= epsilon^(f) // Since there are only 2 possibilities, we only need to care about the // parity. Also, zi and r should have the same parity since the divisor // is an even number if (cache_accessor::compute_mul_parity(two_fc, cache, beta_minus_1) != approx_y_parity) { --ret_value.significand; } else { // If z^(f) >= epsilon^(f), we might have a tie // when z^(f) == epsilon^(f), or equivalently, when y is an integer if (is_center_integer(two_fc, exponent, minus_k)) { ret_value.significand = ret_value.significand % 2 == 0 ? ret_value.significand : ret_value.significand - 1; } } } // Is dist not divisible by 5^kappa? else { ret_value.significand += dist; } } // Is dist not divisible by 2^kappa? else { // Since we know dist is small, we might be able to optimize the division // better than the compiler; we are computing dist / small_divisor here ret_value.significand += small_division_by_pow10::kappa>(dist); } return ret_value; } } // namespace dragonbox // Formats value using a variation of the Fixed-Precision Positive // Floating-Point Printout ((FPP)^2) algorithm by Steele & White: // https://fmt.dev/papers/p372-steele.pdf. template void fallback_format(Double d, int num_digits, bool binary32, buffer& buf, int& exp10) { bigint numerator; // 2 * R in (FPP)^2. bigint denominator; // 2 * S in (FPP)^2. // lower and upper are differences between value and corresponding boundaries. bigint lower; // (M^- in (FPP)^2). bigint upper_store; // upper's value if different from lower. bigint* upper = nullptr; // (M^+ in (FPP)^2). fp value; // Shift numerator and denominator by an extra bit or two (if lower boundary // is closer) to make lower and upper integers. This eliminates multiplication // by 2 during later computations. const bool is_predecessor_closer = binary32 ? value.assign(static_cast(d)) : value.assign(d); int shift = is_predecessor_closer ? 2 : 1; uint64_t significand = value.f << shift; if (value.e >= 0) { numerator.assign(significand); numerator <<= value.e; lower.assign(1); lower <<= value.e; if (shift != 1) { upper_store.assign(1); upper_store <<= value.e + 1; upper = &upper_store; } denominator.assign_pow10(exp10); denominator <<= shift; } else if (exp10 < 0) { numerator.assign_pow10(-exp10); lower.assign(numerator); if (shift != 1) { upper_store.assign(numerator); upper_store <<= 1; upper = &upper_store; } numerator *= significand; denominator.assign(1); denominator <<= shift - value.e; } else { numerator.assign(significand); denominator.assign_pow10(exp10); denominator <<= shift - value.e; lower.assign(1); if (shift != 1) { upper_store.assign(1ULL << 1); upper = &upper_store; } } // Invariant: value == (numerator / denominator) * pow(10, exp10). if (num_digits < 0) { // Generate the shortest representation. if (!upper) upper = &lower; bool even = (value.f & 1) == 0; num_digits = 0; char* data = buf.data(); for (;;) { int digit = numerator.divmod_assign(denominator); bool low = compare(numerator, lower) - even < 0; // numerator <[=] lower. // numerator + upper >[=] pow10: bool high = add_compare(numerator, *upper, denominator) + even > 0; data[num_digits++] = static_cast('0' + digit); if (low || high) { if (!low) { ++data[num_digits - 1]; } else if (high) { int result = add_compare(numerator, numerator, denominator); // Round half to even. if (result > 0 || (result == 0 && (digit % 2) != 0)) ++data[num_digits - 1]; } buf.try_resize(to_unsigned(num_digits)); exp10 -= num_digits - 1; return; } numerator *= 10; lower *= 10; if (upper != &lower) *upper *= 10; } } // Generate the given number of digits. exp10 -= num_digits - 1; if (num_digits == 0) { buf.try_resize(1); denominator *= 10; buf[0] = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0'; return; } buf.try_resize(to_unsigned(num_digits)); for (int i = 0; i < num_digits - 1; ++i) { int digit = numerator.divmod_assign(denominator); buf[i] = static_cast('0' + digit); numerator *= 10; } int digit = numerator.divmod_assign(denominator); auto result = add_compare(numerator, numerator, denominator); if (result > 0 || (result == 0 && (digit % 2) != 0)) { if (digit == 9) { const auto overflow = '0' + 10; buf[num_digits - 1] = overflow; // Propagate the carry. for (int i = num_digits - 1; i > 0 && buf[i] == overflow; --i) { buf[i] = '0'; ++buf[i - 1]; } if (buf[0] == overflow) { buf[0] = '1'; ++exp10; } return; } ++digit; } buf[num_digits - 1] = static_cast('0' + digit); } template int format_float(T value, int precision, float_specs specs, buffer& buf) { static_assert(!std::is_same::value, ""); FMT_ASSERT(value >= 0, "value is negative"); const bool fixed = specs.format == float_format::fixed; if (value <= 0) { // <= instead of == to silence a warning. if (precision <= 0 || !fixed) { buf.push_back('0'); return 0; } buf.try_resize(to_unsigned(precision)); std::uninitialized_fill_n(buf.data(), precision, '0'); return -precision; } if (!specs.use_grisu) return snprintf_float(value, precision, specs, buf); if (precision < 0) { // Use Dragonbox for the shortest format. if (specs.binary32) { auto dec = dragonbox::to_decimal(static_cast(value)); write(buffer_appender(buf), dec.significand); return dec.exponent; } auto dec = dragonbox::to_decimal(static_cast(value)); write(buffer_appender(buf), dec.significand); return dec.exponent; } // Use Grisu + Dragon4 for the given precision: // https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf. int exp = 0; const int min_exp = -60; // alpha in Grisu. int cached_exp10 = 0; // K in Grisu. fp normalized = normalize(fp(value)); const auto cached_pow = get_cached_power( min_exp - (normalized.e + fp::significand_size), cached_exp10); normalized = normalized * cached_pow; // Limit precision to the maximum possible number of significant digits in an // IEEE754 double because we don't need to generate zeros. const int max_double_digits = 767; if (precision > max_double_digits) precision = max_double_digits; fixed_handler handler{buf.data(), 0, precision, -cached_exp10, fixed}; if (grisu_gen_digits(normalized, 1, exp, handler) == digits::error) { exp += handler.size - cached_exp10 - 1; fallback_format(value, handler.precision, specs.binary32, buf, exp); } else { exp += handler.exp10; buf.try_resize(to_unsigned(handler.size)); } if (!fixed && !specs.showpoint) { // Remove trailing zeros. auto num_digits = buf.size(); while (num_digits > 0 && buf[num_digits - 1] == '0') { --num_digits; ++exp; } buf.try_resize(num_digits); } return exp; } // namespace detail template int snprintf_float(T value, int precision, float_specs specs, buffer& buf) { // Buffer capacity must be non-zero, otherwise MSVC's vsnprintf_s will fail. FMT_ASSERT(buf.capacity() > buf.size(), "empty buffer"); static_assert(!std::is_same::value, ""); // Subtract 1 to account for the difference in precision since we use %e for // both general and exponent format. if (specs.format == float_format::general || specs.format == float_format::exp) precision = (precision >= 0 ? precision : 6) - 1; // Build the format string. enum { max_format_size = 7 }; // The longest format is "%#.*Le". char format[max_format_size]; char* format_ptr = format; *format_ptr++ = '%'; if (specs.showpoint && specs.format == float_format::hex) *format_ptr++ = '#'; if (precision >= 0) { *format_ptr++ = '.'; *format_ptr++ = '*'; } if (std::is_same()) *format_ptr++ = 'L'; *format_ptr++ = specs.format != float_format::hex ? (specs.format == float_format::fixed ? 'f' : 'e') : (specs.upper ? 'A' : 'a'); *format_ptr = '\0'; // Format using snprintf. auto offset = buf.size(); for (;;) { auto begin = buf.data() + offset; auto capacity = buf.capacity() - offset; #ifdef FMT_FUZZ if (precision > 100000) throw std::runtime_error( "fuzz mode - avoid large allocation inside snprintf"); #endif // Suppress the warning about a nonliteral format string. // Cannot use auto because of a bug in MinGW (#1532). int (*snprintf_ptr)(char*, size_t, const char*, ...) = FMT_SNPRINTF; int result = precision >= 0 ? snprintf_ptr(begin, capacity, format, precision, value) : snprintf_ptr(begin, capacity, format, value); if (result < 0) { // The buffer will grow exponentially. buf.try_reserve(buf.capacity() + 1); continue; } auto size = to_unsigned(result); // Size equal to capacity means that the last character was truncated. if (size >= capacity) { buf.try_reserve(size + offset + 1); // Add 1 for the terminating '\0'. continue; } auto is_digit = [](char c) { return c >= '0' && c <= '9'; }; if (specs.format == float_format::fixed) { if (precision == 0) { buf.try_resize(size); return 0; } // Find and remove the decimal point. auto end = begin + size, p = end; do { --p; } while (is_digit(*p)); int fraction_size = static_cast(end - p - 1); std::memmove(p, p + 1, to_unsigned(fraction_size)); buf.try_resize(size - 1); return -fraction_size; } if (specs.format == float_format::hex) { buf.try_resize(size + offset); return 0; } // Find and parse the exponent. auto end = begin + size, exp_pos = end; do { --exp_pos; } while (*exp_pos != 'e'); char sign = exp_pos[1]; FMT_ASSERT(sign == '+' || sign == '-', ""); int exp = 0; auto p = exp_pos + 2; // Skip 'e' and sign. do { FMT_ASSERT(is_digit(*p), ""); exp = exp * 10 + (*p++ - '0'); } while (p != end); if (sign == '-') exp = -exp; int fraction_size = 0; if (exp_pos != begin + 1) { // Remove trailing zeros. auto fraction_end = exp_pos - 1; while (*fraction_end == '0') --fraction_end; // Move the fractional part left to get rid of the decimal point. fraction_size = static_cast(fraction_end - begin - 1); std::memmove(begin + 1, begin + 2, to_unsigned(fraction_size)); } buf.try_resize(to_unsigned(fraction_size) + offset + 1); return exp - fraction_size; } } } // namespace detail template <> struct formatter { FMT_CONSTEXPR format_parse_context::iterator parse( format_parse_context& ctx) { return ctx.begin(); } format_context::iterator format(const detail::bigint& n, format_context& ctx) { auto out = ctx.out(); bool first = true; for (auto i = n.bigits_.size(); i > 0; --i) { auto value = n.bigits_[i - 1u]; if (first) { out = format_to(out, FMT_STRING("{:x}"), value); first = false; continue; } out = format_to(out, FMT_STRING("{:08x}"), value); } if (n.exp_ > 0) out = format_to(out, FMT_STRING("p{}"), n.exp_ * detail::bigint::bigit_bits); return out; } }; FMT_FUNC detail::utf8_to_utf16::utf8_to_utf16(string_view s) { for_each_codepoint(s, [this](uint32_t cp, int error) { if (error != 0) FMT_THROW(std::runtime_error("invalid utf8")); if (cp <= 0xFFFF) { buffer_.push_back(static_cast(cp)); } else { cp -= 0x10000; buffer_.push_back(static_cast(0xD800 + (cp >> 10))); buffer_.push_back(static_cast(0xDC00 + (cp & 0x3FF))); } }); buffer_.push_back(0); } FMT_FUNC void format_system_error(detail::buffer& out, int error_code, const char* message) FMT_NOEXCEPT { FMT_TRY { auto ec = std::error_code(error_code, std::generic_category()); write(std::back_inserter(out), std::system_error(ec, message).what()); return; } FMT_CATCH(...) {} format_error_code(out, error_code, message); } FMT_FUNC void detail::error_handler::on_error(const char* message) { FMT_THROW(format_error(message)); } FMT_FUNC void report_system_error(int error_code, const char* message) FMT_NOEXCEPT { report_error(format_system_error, error_code, message); } FMT_FUNC std::string vformat(string_view fmt, format_args args) { // Don't optimize the "{}" case to keep the binary size small and because it // can be better optimized in fmt::format anyway. auto buffer = memory_buffer(); detail::vformat_to(buffer, fmt, args); return to_string(buffer); } #ifdef _WIN32 namespace detail { using dword = conditional_t; extern "C" __declspec(dllimport) int __stdcall WriteConsoleW( // void*, const void*, dword, dword*, void*); } // namespace detail #endif namespace detail { FMT_FUNC void print(std::FILE* f, string_view text) { #ifdef _WIN32 auto fd = _fileno(f); if (_isatty(fd)) { detail::utf8_to_utf16 u16(string_view(text.data(), text.size())); auto written = detail::dword(); if (detail::WriteConsoleW(reinterpret_cast(_get_osfhandle(fd)), u16.c_str(), static_cast(u16.size()), &written, nullptr)) { return; } // Fallback to fwrite on failure. It can happen if the output has been // redirected to NUL. } #endif detail::fwrite_fully(text.data(), 1, text.size(), f); } } // namespace detail FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) { memory_buffer buffer; detail::vformat_to(buffer, format_str, args); detail::print(f, {buffer.data(), buffer.size()}); } #ifdef _WIN32 // Print assuming legacy (non-Unicode) encoding. FMT_FUNC void detail::vprint_mojibake(std::FILE* f, string_view format_str, format_args args) { memory_buffer buffer; detail::vformat_to(buffer, format_str, basic_format_args>(args)); fwrite_fully(buffer.data(), 1, buffer.size(), f); } #endif FMT_FUNC void vprint(string_view format_str, format_args args) { vprint(stdout, format_str, args); } FMT_END_NAMESPACE #endif // FMT_FORMAT_INL_H_ cpp11/inst/include/fmt/format.h0000644000175000017500000031222114077027144016166 0ustar nileshnilesh/* Formatting library for C++ Copyright (c) 2012 - present, Victor Zverovich Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --- Optional exception to the license --- As an exception, if, as a result of your compiling your source code, portions of this Software are embedded into a machine-executable object form of such source code, you may redistribute such embedded portions in such object form without including the above copyright and permission notices. */ #ifndef FMT_FORMAT_H_ #define FMT_FORMAT_H_ #include // std::signbit #include // uint32_t #include // std::numeric_limits #include // std::uninitialized_copy #include // std::runtime_error #include // std::system_error #include // std::swap #include "core.h" #ifdef __INTEL_COMPILER # define FMT_ICC_VERSION __INTEL_COMPILER #elif defined(__ICL) # define FMT_ICC_VERSION __ICL #else # define FMT_ICC_VERSION 0 #endif #ifdef __NVCC__ # define FMT_CUDA_VERSION (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__) #else # define FMT_CUDA_VERSION 0 #endif #ifdef __has_builtin # define FMT_HAS_BUILTIN(x) __has_builtin(x) #else # define FMT_HAS_BUILTIN(x) 0 #endif #if FMT_GCC_VERSION || FMT_CLANG_VERSION # define FMT_NOINLINE __attribute__((noinline)) #else # define FMT_NOINLINE #endif #if FMT_MSC_VER # define FMT_MSC_DEFAULT = default #else # define FMT_MSC_DEFAULT #endif #ifndef FMT_THROW # if FMT_EXCEPTIONS # if FMT_MSC_VER || FMT_NVCC FMT_BEGIN_NAMESPACE namespace detail { template inline void do_throw(const Exception& x) { // Silence unreachable code warnings in MSVC and NVCC because these // are nearly impossible to fix in a generic code. volatile bool b = true; if (b) throw x; } } // namespace detail FMT_END_NAMESPACE # define FMT_THROW(x) detail::do_throw(x) # else # define FMT_THROW(x) throw x # endif # else # define FMT_THROW(x) \ do { \ FMT_ASSERT(false, (x).what()); \ } while (false) # endif #endif #if FMT_EXCEPTIONS # define FMT_TRY try # define FMT_CATCH(x) catch (x) #else # define FMT_TRY if (true) # define FMT_CATCH(x) if (false) #endif #ifndef FMT_DEPRECATED # if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VER >= 1900 # define FMT_DEPRECATED [[deprecated]] # else # if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__) # define FMT_DEPRECATED __attribute__((deprecated)) # elif FMT_MSC_VER # define FMT_DEPRECATED __declspec(deprecated) # else # define FMT_DEPRECATED /* deprecated */ # endif # endif #endif // Workaround broken [[deprecated]] in the Intel, PGI and NVCC compilers. #if FMT_ICC_VERSION || defined(__PGI) || FMT_NVCC # define FMT_DEPRECATED_ALIAS #else # define FMT_DEPRECATED_ALIAS FMT_DEPRECATED #endif #ifndef FMT_USE_USER_DEFINED_LITERALS // EDG based compilers (Intel, NVIDIA, Elbrus, etc), GCC and MSVC support UDLs. # if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 407 || \ FMT_MSC_VER >= 1900) && \ (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= /* UDL feature */ 480) # define FMT_USE_USER_DEFINED_LITERALS 1 # else # define FMT_USE_USER_DEFINED_LITERALS 0 # endif #endif // Defining FMT_REDUCE_INT_INSTANTIATIONS to 1, will reduce the number of // integer formatter template instantiations to just one by only using the // largest integer type. This results in a reduction in binary size but will // cause a decrease in integer formatting performance. #if !defined(FMT_REDUCE_INT_INSTANTIATIONS) # define FMT_REDUCE_INT_INSTANTIATIONS 0 #endif // __builtin_clz is broken in clang with Microsoft CodeGen: // https://github.com/fmtlib/fmt/issues/519 #if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clz)) && !FMT_MSC_VER # define FMT_BUILTIN_CLZ(n) __builtin_clz(n) #endif #if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clzll)) && !FMT_MSC_VER # define FMT_BUILTIN_CLZLL(n) __builtin_clzll(n) #endif #if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_ctz)) # define FMT_BUILTIN_CTZ(n) __builtin_ctz(n) #endif #if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_ctzll)) # define FMT_BUILTIN_CTZLL(n) __builtin_ctzll(n) #endif #if FMT_MSC_VER # include // _BitScanReverse[64], _BitScanForward[64], _umul128 #endif // Some compilers masquerade as both MSVC and GCC-likes or otherwise support // __builtin_clz and __builtin_clzll, so only define FMT_BUILTIN_CLZ using the // MSVC intrinsics if the clz and clzll builtins are not available. #if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && !defined(FMT_BUILTIN_CTZLL) FMT_BEGIN_NAMESPACE namespace detail { // Avoid Clang with Microsoft CodeGen's -Wunknown-pragmas warning. # if !defined(__clang__) # pragma managed(push, off) # pragma intrinsic(_BitScanForward) # pragma intrinsic(_BitScanReverse) # if defined(_WIN64) # pragma intrinsic(_BitScanForward64) # pragma intrinsic(_BitScanReverse64) # endif # endif inline auto clz(uint32_t x) -> int { unsigned long r = 0; _BitScanReverse(&r, x); FMT_ASSERT(x != 0, ""); // Static analysis complains about using uninitialized data // "r", but the only way that can happen is if "x" is 0, // which the callers guarantee to not happen. FMT_MSC_WARNING(suppress : 6102) return 31 ^ static_cast(r); } # define FMT_BUILTIN_CLZ(n) detail::clz(n) inline auto clzll(uint64_t x) -> int { unsigned long r = 0; # ifdef _WIN64 _BitScanReverse64(&r, x); # else // Scan the high 32 bits. if (_BitScanReverse(&r, static_cast(x >> 32))) return 63 ^ (r + 32); // Scan the low 32 bits. _BitScanReverse(&r, static_cast(x)); # endif FMT_ASSERT(x != 0, ""); FMT_MSC_WARNING(suppress : 6102) // Suppress a bogus static analysis warning. return 63 ^ static_cast(r); } # define FMT_BUILTIN_CLZLL(n) detail::clzll(n) inline auto ctz(uint32_t x) -> int { unsigned long r = 0; _BitScanForward(&r, x); FMT_ASSERT(x != 0, ""); FMT_MSC_WARNING(suppress : 6102) // Suppress a bogus static analysis warning. return static_cast(r); } # define FMT_BUILTIN_CTZ(n) detail::ctz(n) inline auto ctzll(uint64_t x) -> int { unsigned long r = 0; FMT_ASSERT(x != 0, ""); FMT_MSC_WARNING(suppress : 6102) // Suppress a bogus static analysis warning. # ifdef _WIN64 _BitScanForward64(&r, x); # else // Scan the low 32 bits. if (_BitScanForward(&r, static_cast(x))) return static_cast(r); // Scan the high 32 bits. _BitScanForward(&r, static_cast(x >> 32)); r += 32; # endif return static_cast(r); } # define FMT_BUILTIN_CTZLL(n) detail::ctzll(n) # if !defined(__clang__) # pragma managed(pop) # endif } // namespace detail FMT_END_NAMESPACE #endif FMT_BEGIN_NAMESPACE namespace detail { #if __cplusplus >= 202002L || \ (__cplusplus >= 201709L && FMT_GCC_VERSION >= 1002) # define FMT_CONSTEXPR20 constexpr #else # define FMT_CONSTEXPR20 #endif // An equivalent of `*reinterpret_cast(&source)` that doesn't have // undefined behavior (e.g. due to type aliasing). // Example: uint64_t d = bit_cast(2.718); template inline auto bit_cast(const Source& source) -> Dest { static_assert(sizeof(Dest) == sizeof(Source), "size mismatch"); Dest dest; std::memcpy(&dest, &source, sizeof(dest)); return dest; } inline auto is_big_endian() -> bool { const auto u = 1u; struct bytes { char data[sizeof(u)]; }; return bit_cast(u).data[0] == 0; } // A fallback implementation of uintptr_t for systems that lack it. struct fallback_uintptr { unsigned char value[sizeof(void*)]; fallback_uintptr() = default; explicit fallback_uintptr(const void* p) { *this = bit_cast(p); if (is_big_endian()) { for (size_t i = 0, j = sizeof(void*) - 1; i < j; ++i, --j) std::swap(value[i], value[j]); } } }; #ifdef UINTPTR_MAX using uintptr_t = ::uintptr_t; inline auto to_uintptr(const void* p) -> uintptr_t { return bit_cast(p); } #else using uintptr_t = fallback_uintptr; inline auto to_uintptr(const void* p) -> fallback_uintptr { return fallback_uintptr(p); } #endif // Returns the largest possible value for type T. Same as // std::numeric_limits::max() but shorter and not affected by the max macro. template constexpr auto max_value() -> T { return (std::numeric_limits::max)(); } template constexpr auto num_bits() -> int { return std::numeric_limits::digits; } // std::numeric_limits::digits may return 0 for 128-bit ints. template <> constexpr auto num_bits() -> int { return 128; } template <> constexpr auto num_bits() -> int { return 128; } template <> constexpr auto num_bits() -> int { return static_cast(sizeof(void*) * std::numeric_limits::digits); } FMT_INLINE void assume(bool condition) { (void)condition; #if FMT_HAS_BUILTIN(__builtin_assume) __builtin_assume(condition); #endif } // An approximation of iterator_t for pre-C++20 systems. template using iterator_t = decltype(std::begin(std::declval())); template using sentinel_t = decltype(std::end(std::declval())); // A workaround for std::string not having mutable data() until C++17. template inline auto get_data(std::basic_string& s) -> Char* { return &s[0]; } template inline auto get_data(Container& c) -> typename Container::value_type* { return c.data(); } #if defined(_SECURE_SCL) && _SECURE_SCL // Make a checked iterator to avoid MSVC warnings. template using checked_ptr = stdext::checked_array_iterator; template auto make_checked(T* p, size_t size) -> checked_ptr { return {p, size}; } #else template using checked_ptr = T*; template inline auto make_checked(T* p, size_t) -> T* { return p; } #endif // Attempts to reserve space for n extra characters in the output range. // Returns a pointer to the reserved range or a reference to it. template ::value)> #if FMT_CLANG_VERSION >= 307 && !FMT_ICC_VERSION __attribute__((no_sanitize("undefined"))) #endif inline auto reserve(std::back_insert_iterator it, size_t n) -> checked_ptr { Container& c = get_container(it); size_t size = c.size(); c.resize(size + n); return make_checked(get_data(c) + size, n); } template inline auto reserve(buffer_appender it, size_t n) -> buffer_appender { buffer& buf = get_container(it); buf.try_reserve(buf.size() + n); return it; } template constexpr auto reserve(Iterator& it, size_t) -> Iterator& { return it; } template using reserve_iterator = remove_reference_t(), 0))>; template constexpr auto to_pointer(OutputIt, size_t) -> T* { return nullptr; } template auto to_pointer(buffer_appender it, size_t n) -> T* { buffer& buf = get_container(it); auto size = buf.size(); if (buf.capacity() < size + n) return nullptr; buf.try_resize(size + n); return buf.data() + size; } template ::value)> inline auto base_iterator(std::back_insert_iterator& it, checked_ptr) -> std::back_insert_iterator { return it; } template constexpr auto base_iterator(Iterator, Iterator it) -> Iterator { return it; } // is spectacularly slow to compile in C++20 so use a simple fill_n // instead (#1998). template FMT_CONSTEXPR auto fill_n(OutputIt out, Size count, const T& value) -> OutputIt { for (Size i = 0; i < count; ++i) *out++ = value; return out; } template FMT_CONSTEXPR20 auto fill_n(T* out, Size count, char value) -> T* { if (is_constant_evaluated()) { return fill_n(out, count, value); } std::memset(out, value, to_unsigned(count)); return out + count; } #ifdef __cpp_char8_t using char8_type = char8_t; #else enum char8_type : unsigned char {}; #endif template FMT_CONSTEXPR FMT_NOINLINE auto copy_str_noinline(InputIt begin, InputIt end, OutputIt out) -> OutputIt { return copy_str(begin, end, out); } // A public domain branchless UTF-8 decoder by Christopher Wellons: // https://github.com/skeeto/branchless-utf8 /* Decode the next character, c, from s, reporting errors in e. * * Since this is a branchless decoder, four bytes will be read from the * buffer regardless of the actual length of the next character. This * means the buffer _must_ have at least three bytes of zero padding * following the end of the data stream. * * Errors are reported in e, which will be non-zero if the parsed * character was somehow invalid: invalid byte sequence, non-canonical * encoding, or a surrogate half. * * The function returns a pointer to the next character. When an error * occurs, this pointer will be a guess that depends on the particular * error, but it will always advance at least one byte. */ FMT_CONSTEXPR inline auto utf8_decode(const char* s, uint32_t* c, int* e) -> const char* { constexpr const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07}; constexpr const uint32_t mins[] = {4194304, 0, 128, 2048, 65536}; constexpr const int shiftc[] = {0, 18, 12, 6, 0}; constexpr const int shifte[] = {0, 6, 4, 2, 0}; int len = code_point_length(s); const char* next = s + len; // Assume a four-byte character and load four bytes. Unused bits are // shifted out. *c = uint32_t(s[0] & masks[len]) << 18; *c |= uint32_t(s[1] & 0x3f) << 12; *c |= uint32_t(s[2] & 0x3f) << 6; *c |= uint32_t(s[3] & 0x3f) << 0; *c >>= shiftc[len]; // Accumulate the various error conditions. using uchar = unsigned char; *e = (*c < mins[len]) << 6; // non-canonical encoding *e |= ((*c >> 11) == 0x1b) << 7; // surrogate half? *e |= (*c > 0x10FFFF) << 8; // out of range? *e |= (uchar(s[1]) & 0xc0) >> 2; *e |= (uchar(s[2]) & 0xc0) >> 4; *e |= uchar(s[3]) >> 6; *e ^= 0x2a; // top two bits of each tail byte correct? *e >>= shifte[len]; return next; } template FMT_CONSTEXPR void for_each_codepoint(string_view s, F f) { auto decode = [f](const char* p) { auto cp = uint32_t(); auto error = 0; p = utf8_decode(p, &cp, &error); f(cp, error); return p; }; auto p = s.data(); const size_t block_size = 4; // utf8_decode always reads blocks of 4 chars. if (s.size() >= block_size) { for (auto end = p + s.size() - block_size + 1; p < end;) p = decode(p); } if (auto num_chars_left = s.data() + s.size() - p) { char buf[2 * block_size - 1] = {}; copy_str(p, p + num_chars_left, buf); p = buf; do { p = decode(p); } while (p - buf < num_chars_left); } } template inline auto compute_width(basic_string_view s) -> size_t { return s.size(); } // Computes approximate display width of a UTF-8 string. FMT_CONSTEXPR inline size_t compute_width(string_view s) { size_t num_code_points = 0; // It is not a lambda for compatibility with C++14. struct count_code_points { size_t* count; FMT_CONSTEXPR void operator()(uint32_t cp, int error) const { *count += detail::to_unsigned( 1 + (error == 0 && cp >= 0x1100 && (cp <= 0x115f || // Hangul Jamo init. consonants cp == 0x2329 || // LEFT-POINTING ANGLE BRACKET〈 cp == 0x232a || // RIGHT-POINTING ANGLE BRACKET 〉 // CJK ... Yi except Unicode Character “〿”: (cp >= 0x2e80 && cp <= 0xa4cf && cp != 0x303f) || (cp >= 0xac00 && cp <= 0xd7a3) || // Hangul Syllables (cp >= 0xf900 && cp <= 0xfaff) || // CJK Compatibility Ideographs (cp >= 0xfe10 && cp <= 0xfe19) || // Vertical Forms (cp >= 0xfe30 && cp <= 0xfe6f) || // CJK Compatibility Forms (cp >= 0xff00 && cp <= 0xff60) || // Fullwidth Forms (cp >= 0xffe0 && cp <= 0xffe6) || // Fullwidth Forms (cp >= 0x20000 && cp <= 0x2fffd) || // CJK (cp >= 0x30000 && cp <= 0x3fffd) || // Miscellaneous Symbols and Pictographs + Emoticons: (cp >= 0x1f300 && cp <= 0x1f64f) || // Supplemental Symbols and Pictographs: (cp >= 0x1f900 && cp <= 0x1f9ff)))); } }; for_each_codepoint(s, count_code_points{&num_code_points}); return num_code_points; } inline auto compute_width(basic_string_view s) -> size_t { return compute_width(basic_string_view( reinterpret_cast(s.data()), s.size())); } template inline auto code_point_index(basic_string_view s, size_t n) -> size_t { size_t size = s.size(); return n < size ? n : size; } // Calculates the index of the nth code point in a UTF-8 string. inline auto code_point_index(basic_string_view s, size_t n) -> size_t { const char8_type* data = s.data(); size_t num_code_points = 0; for (size_t i = 0, size = s.size(); i != size; ++i) { if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) return i; } return s.size(); } template using is_fast_float = bool_constant::is_iec559 && sizeof(T) <= sizeof(double)>; #ifndef FMT_USE_FULL_CACHE_DRAGONBOX # define FMT_USE_FULL_CACHE_DRAGONBOX 0 #endif template template void buffer::append(const U* begin, const U* end) { while (begin != end) { auto count = to_unsigned(end - begin); try_reserve(size_ + count); auto free_cap = capacity_ - size_; if (free_cap < count) count = free_cap; std::uninitialized_copy_n(begin, count, make_checked(ptr_ + size_, count)); size_ += count; begin += count; } } template struct is_locale : std::false_type {}; template struct is_locale> : std::true_type {}; } // namespace detail FMT_MODULE_EXPORT_BEGIN // The number of characters to store in the basic_memory_buffer object itself // to avoid dynamic memory allocation. enum { inline_buffer_size = 500 }; /** \rst A dynamically growing memory buffer for trivially copyable/constructible types with the first ``SIZE`` elements stored in the object itself. You can use the ```memory_buffer`` type alias for ``char`` instead. **Example**:: fmt::memory_buffer out; format_to(out, "The answer is {}.", 42); This will append the following output to the ``out`` object: .. code-block:: none The answer is 42. The output can be converted to an ``std::string`` with ``to_string(out)``. \endrst */ template > class basic_memory_buffer final : public detail::buffer { private: T store_[SIZE]; // Don't inherit from Allocator avoid generating type_info for it. Allocator alloc_; // Deallocate memory allocated by the buffer. void deallocate() { T* data = this->data(); if (data != store_) alloc_.deallocate(data, this->capacity()); } protected: void grow(size_t size) final FMT_OVERRIDE; public: using value_type = T; using const_reference = const T&; explicit basic_memory_buffer(const Allocator& alloc = Allocator()) : alloc_(alloc) { this->set(store_, SIZE); } ~basic_memory_buffer() { deallocate(); } private: // Move data from other to this buffer. void move(basic_memory_buffer& other) { alloc_ = std::move(other.alloc_); T* data = other.data(); size_t size = other.size(), capacity = other.capacity(); if (data == other.store_) { this->set(store_, capacity); std::uninitialized_copy(other.store_, other.store_ + size, detail::make_checked(store_, capacity)); } else { this->set(data, capacity); // Set pointer to the inline array so that delete is not called // when deallocating. other.set(other.store_, 0); } this->resize(size); } public: /** \rst Constructs a :class:`fmt::basic_memory_buffer` object moving the content of the other object to it. \endrst */ basic_memory_buffer(basic_memory_buffer&& other) FMT_NOEXCEPT { move(other); } /** \rst Moves the content of the other ``basic_memory_buffer`` object to this one. \endrst */ auto operator=(basic_memory_buffer&& other) FMT_NOEXCEPT -> basic_memory_buffer& { FMT_ASSERT(this != &other, ""); deallocate(); move(other); return *this; } // Returns a copy of the allocator associated with this buffer. auto get_allocator() const -> Allocator { return alloc_; } /** Resizes the buffer to contain *count* elements. If T is a POD type new elements may not be initialized. */ void resize(size_t count) { this->try_resize(count); } /** Increases the buffer capacity to *new_capacity*. */ void reserve(size_t new_capacity) { this->try_reserve(new_capacity); } // Directly append data into the buffer using detail::buffer::append; template void append(const ContiguousRange& range) { append(range.data(), range.data() + range.size()); } }; template void basic_memory_buffer::grow(size_t size) { #ifdef FMT_FUZZ if (size > 5000) throw std::runtime_error("fuzz mode - won't grow that much"); #endif const size_t max_size = std::allocator_traits::max_size(alloc_); size_t old_capacity = this->capacity(); size_t new_capacity = old_capacity + old_capacity / 2; if (size > new_capacity) new_capacity = size; else if (new_capacity > max_size) new_capacity = size > max_size ? size : max_size; T* old_data = this->data(); T* new_data = std::allocator_traits::allocate(alloc_, new_capacity); // The following code doesn't throw, so the raw pointer above doesn't leak. std::uninitialized_copy(old_data, old_data + this->size(), detail::make_checked(new_data, new_capacity)); this->set(new_data, new_capacity); // deallocate must not throw according to the standard, but even if it does, // the buffer already uses the new storage and will deallocate it in // destructor. if (old_data != store_) alloc_.deallocate(old_data, old_capacity); } using memory_buffer = basic_memory_buffer; template struct is_contiguous> : std::true_type { }; namespace detail { FMT_API void print(std::FILE*, string_view); } /** A formatting error such as invalid format string. */ FMT_CLASS_API class FMT_API format_error : public std::runtime_error { public: explicit format_error(const char* message) : std::runtime_error(message) {} explicit format_error(const std::string& message) : std::runtime_error(message) {} format_error(const format_error&) = default; format_error& operator=(const format_error&) = default; format_error(format_error&&) = default; format_error& operator=(format_error&&) = default; ~format_error() FMT_NOEXCEPT FMT_OVERRIDE FMT_MSC_DEFAULT; }; /** \rst Constructs a `~fmt::format_arg_store` object that contains references to arguments and can be implicitly converted to `~fmt::format_args`. If ``fmt`` is a compile-time string then `make_args_checked` checks its validity at compile time. \endrst */ template > FMT_INLINE auto make_args_checked(const S& fmt, const remove_reference_t&... args) -> format_arg_store, remove_reference_t...> { static_assert( detail::count<( std::is_base_of>::value && std::is_reference::value)...>() == 0, "passing views as lvalues is disallowed"); detail::check_format_string(fmt); return {args...}; } // compile-time support namespace detail_exported { #if FMT_USE_NONTYPE_TEMPLATE_PARAMETERS template struct fixed_string { constexpr fixed_string(const Char (&str)[N]) { detail::copy_str(static_cast(str), str + N, data); } Char data[N]{}; }; #endif // Converts a compile-time string to basic_string_view. template constexpr auto compile_string_to_view(const Char (&s)[N]) -> basic_string_view { // Remove trailing NUL character if needed. Won't be present if this is used // with a raw character array (i.e. not defined as a string). return {s, N - (std::char_traits::to_int_type(s[N - 1]) == 0 ? 1 : 0)}; } template constexpr auto compile_string_to_view(detail::std_string_view s) -> basic_string_view { return {s.data(), s.size()}; } } // namespace detail_exported FMT_BEGIN_DETAIL_NAMESPACE inline void throw_format_error(const char* message) { FMT_THROW(format_error(message)); } template struct is_integral : std::is_integral {}; template <> struct is_integral : std::true_type {}; template <> struct is_integral : std::true_type {}; template using is_signed = std::integral_constant::is_signed || std::is_same::value>; // Returns true if value is negative, false otherwise. // Same as `value < 0` but doesn't produce warnings if T is an unsigned type. template ::value)> FMT_CONSTEXPR auto is_negative(T value) -> bool { return value < 0; } template ::value)> FMT_CONSTEXPR auto is_negative(T) -> bool { return false; } template ::value)> FMT_CONSTEXPR auto is_supported_floating_point(T) -> uint16_t { return (std::is_same::value && FMT_USE_FLOAT) || (std::is_same::value && FMT_USE_DOUBLE) || (std::is_same::value && FMT_USE_LONG_DOUBLE); } // Smallest of uint32_t, uint64_t, uint128_t that is large enough to // represent all values of an integral type T. template using uint32_or_64_or_128_t = conditional_t() <= 32 && !FMT_REDUCE_INT_INSTANTIATIONS, uint32_t, conditional_t() <= 64, uint64_t, uint128_t>>; template using uint64_or_128_t = conditional_t() <= 64, uint64_t, uint128_t>; #define FMT_POWERS_OF_10(factor) \ factor * 10, (factor)*100, (factor)*1000, (factor)*10000, (factor)*100000, \ (factor)*1000000, (factor)*10000000, (factor)*100000000, \ (factor)*1000000000 // Static data is placed in this class template for the header-only config. template struct basic_data { // log10(2) = 0x0.4d104d427de7fbcc... static const uint64_t log10_2_significand = 0x4d104d427de7fbcc; // GCC generates slightly better code for pairs than chars. FMT_API static constexpr const char digits[100][2] = { {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'}, {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}}; FMT_API static constexpr const char hex_digits[] = "0123456789abcdef"; FMT_API static constexpr const char signs[4] = {0, '-', '+', ' '}; FMT_API static constexpr const unsigned prefixes[4] = {0, 0, 0x1000000u | '+', 0x1000000u | ' '}; FMT_API static constexpr const char left_padding_shifts[5] = {31, 31, 0, 1, 0}; FMT_API static constexpr const char right_padding_shifts[5] = {0, 31, 0, 1, 0}; }; #ifdef FMT_SHARED // Required for -flto, -fivisibility=hidden and -shared to work extern template struct basic_data; #endif // This is a struct rather than an alias to avoid shadowing warnings in gcc. struct data : basic_data<> {}; template FMT_CONSTEXPR auto count_digits_fallback(T n) -> int { int count = 1; for (;;) { // Integer division is slow so do it for a group of four digits instead // of for every digit. The idea comes from the talk by Alexandrescu // "Three Optimization Tips for C++". See speed-test for a comparison. if (n < 10) return count; if (n < 100) return count + 1; if (n < 1000) return count + 2; if (n < 10000) return count + 3; n /= 10000u; count += 4; } } #if FMT_USE_INT128 FMT_CONSTEXPR inline auto count_digits(uint128_t n) -> int { return count_digits_fallback(n); } #endif // Returns the number of decimal digits in n. Leading zeros are not counted // except for n == 0 in which case count_digits returns 1. FMT_CONSTEXPR20 inline auto count_digits(uint64_t n) -> int { #ifdef FMT_BUILTIN_CLZLL if (!is_constant_evaluated()) { // https://github.com/fmtlib/format-benchmark/blob/master/digits10 // Maps bsr(n) to ceil(log10(pow(2, bsr(n) + 1) - 1)). constexpr uint16_t bsr2log10[] = { 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 19, 20}; auto t = bsr2log10[FMT_BUILTIN_CLZLL(n | 1) ^ 63]; constexpr const uint64_t zero_or_powers_of_10[] = { 0, 0, FMT_POWERS_OF_10(1U), FMT_POWERS_OF_10(1000000000ULL), 10000000000000000000ULL}; return t - (n < zero_or_powers_of_10[t]); } #endif return count_digits_fallback(n); } // Counts the number of digits in n. BITS = log2(radix). template FMT_CONSTEXPR auto count_digits(UInt n) -> int { #ifdef FMT_BUILTIN_CLZ if (num_bits() == 32) return (FMT_BUILTIN_CLZ(static_cast(n) | 1) ^ 31) / BITS + 1; #endif int num_digits = 0; do { ++num_digits; } while ((n >>= BITS) != 0); return num_digits; } template <> auto count_digits<4>(detail::fallback_uintptr n) -> int; // It is a separate function rather than a part of count_digits to workaround // the lack of static constexpr in constexpr functions. FMT_INLINE uint64_t count_digits_inc(int n) { // An optimization by Kendall Willets from https://bit.ly/3uOIQrB. // This increments the upper 32 bits (log10(T) - 1) when >= T is added. #define FMT_INC(T) (((sizeof(#T) - 1ull) << 32) - T) static constexpr uint64_t table[] = { FMT_INC(0), FMT_INC(0), FMT_INC(0), // 8 FMT_INC(10), FMT_INC(10), FMT_INC(10), // 64 FMT_INC(100), FMT_INC(100), FMT_INC(100), // 512 FMT_INC(1000), FMT_INC(1000), FMT_INC(1000), // 4096 FMT_INC(10000), FMT_INC(10000), FMT_INC(10000), // 32k FMT_INC(100000), FMT_INC(100000), FMT_INC(100000), // 256k FMT_INC(1000000), FMT_INC(1000000), FMT_INC(1000000), // 2048k FMT_INC(10000000), FMT_INC(10000000), FMT_INC(10000000), // 16M FMT_INC(100000000), FMT_INC(100000000), FMT_INC(100000000), // 128M FMT_INC(1000000000), FMT_INC(1000000000), FMT_INC(1000000000), // 1024M FMT_INC(1000000000), FMT_INC(1000000000) // 4B }; return table[n]; } // Optional version of count_digits for better performance on 32-bit platforms. FMT_CONSTEXPR20 inline auto count_digits(uint32_t n) -> int { #ifdef FMT_BUILTIN_CLZ if (!is_constant_evaluated()) { auto inc = count_digits_inc(FMT_BUILTIN_CLZ(n | 1) ^ 31); return static_cast((n + inc) >> 32); } #endif return count_digits_fallback(n); } template constexpr auto digits10() FMT_NOEXCEPT -> int { return std::numeric_limits::digits10; } template <> constexpr auto digits10() FMT_NOEXCEPT -> int { return 38; } template <> constexpr auto digits10() FMT_NOEXCEPT -> int { return 38; } template struct thousands_sep_result { std::string grouping; Char thousands_sep; }; template FMT_API auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result; template inline auto thousands_sep(locale_ref loc) -> thousands_sep_result { auto result = thousands_sep_impl(loc); return {result.grouping, Char(result.thousands_sep)}; } template <> inline auto thousands_sep(locale_ref loc) -> thousands_sep_result { return thousands_sep_impl(loc); } template FMT_API auto decimal_point_impl(locale_ref loc) -> Char; template inline auto decimal_point(locale_ref loc) -> Char { return Char(decimal_point_impl(loc)); } template <> inline auto decimal_point(locale_ref loc) -> wchar_t { return decimal_point_impl(loc); } // Compares two characters for equality. template auto equal2(const Char* lhs, const char* rhs) -> bool { return lhs[0] == rhs[0] && lhs[1] == rhs[1]; } inline auto equal2(const char* lhs, const char* rhs) -> bool { return memcmp(lhs, rhs, 2) == 0; } // Copies two characters from src to dst. template void copy2(Char* dst, const char* src) { *dst++ = static_cast(*src++); *dst = static_cast(*src); } FMT_INLINE void copy2(char* dst, const char* src) { memcpy(dst, src, 2); } template struct format_decimal_result { Iterator begin; Iterator end; }; // Formats a decimal unsigned integer value writing into out pointing to a // buffer of specified size. The caller must ensure that the buffer is large // enough. template FMT_CONSTEXPR20 auto format_decimal(Char* out, UInt value, int size) -> format_decimal_result { FMT_ASSERT(size >= count_digits(value), "invalid digit count"); out += size; Char* end = out; if (is_constant_evaluated()) { while (value >= 10) { *--out = static_cast('0' + value % 10); value /= 10; } *--out = static_cast('0' + value); return {out, end}; } while (value >= 100) { // Integer division is slow so do it for a group of two digits instead // of for every digit. The idea comes from the talk by Alexandrescu // "Three Optimization Tips for C++". See speed-test for a comparison. out -= 2; copy2(out, data::digits[value % 100]); value /= 100; } if (value < 10) { *--out = static_cast('0' + value); return {out, end}; } out -= 2; copy2(out, data::digits[value]); return {out, end}; } template >::value)> inline auto format_decimal(Iterator out, UInt value, int size) -> format_decimal_result { // Buffer is large enough to hold all digits (digits10 + 1). Char buffer[digits10() + 1]; auto end = format_decimal(buffer, value, size).end; return {out, detail::copy_str_noinline(buffer, end, out)}; } template FMT_CONSTEXPR auto format_uint(Char* buffer, UInt value, int num_digits, bool upper = false) -> Char* { buffer += num_digits; Char* end = buffer; do { const char* digits = upper ? "0123456789ABCDEF" : data::hex_digits; unsigned digit = (value & ((1 << BASE_BITS) - 1)); *--buffer = static_cast(BASE_BITS < 4 ? static_cast('0' + digit) : digits[digit]); } while ((value >>= BASE_BITS) != 0); return end; } template auto format_uint(Char* buffer, detail::fallback_uintptr n, int num_digits, bool = false) -> Char* { auto char_digits = std::numeric_limits::digits / 4; int start = (num_digits + char_digits - 1) / char_digits - 1; if (int start_digits = num_digits % char_digits) { unsigned value = n.value[start--]; buffer = format_uint(buffer, value, start_digits); } for (; start >= 0; --start) { unsigned value = n.value[start]; buffer += char_digits; auto p = buffer; for (int i = 0; i < char_digits; ++i) { unsigned digit = (value & ((1 << BASE_BITS) - 1)); *--p = static_cast(data::hex_digits[digit]); value >>= BASE_BITS; } } return buffer; } template inline auto format_uint(It out, UInt value, int num_digits, bool upper = false) -> It { if (auto ptr = to_pointer(out, to_unsigned(num_digits))) { format_uint(ptr, value, num_digits, upper); return out; } // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1). char buffer[num_bits() / BASE_BITS + 1]; format_uint(buffer, value, num_digits, upper); return detail::copy_str_noinline(buffer, buffer + num_digits, out); } // A converter from UTF-8 to UTF-16. class utf8_to_utf16 { private: basic_memory_buffer buffer_; public: FMT_API explicit utf8_to_utf16(string_view s); operator basic_string_view() const { return {&buffer_[0], size()}; } auto size() const -> size_t { return buffer_.size() - 1; } auto c_str() const -> const wchar_t* { return &buffer_[0]; } auto str() const -> std::wstring { return {&buffer_[0], size()}; } }; namespace dragonbox { // Type-specific information that Dragonbox uses. template struct float_info; template <> struct float_info { using carrier_uint = uint32_t; static const int significand_bits = 23; static const int exponent_bits = 8; static const int min_exponent = -126; static const int max_exponent = 127; static const int exponent_bias = -127; static const int decimal_digits = 9; static const int kappa = 1; static const int big_divisor = 100; static const int small_divisor = 10; static const int min_k = -31; static const int max_k = 46; static const int cache_bits = 64; static const int divisibility_check_by_5_threshold = 39; static const int case_fc_pm_half_lower_threshold = -1; static const int case_fc_pm_half_upper_threshold = 6; static const int case_fc_lower_threshold = -2; static const int case_fc_upper_threshold = 6; static const int case_shorter_interval_left_endpoint_lower_threshold = 2; static const int case_shorter_interval_left_endpoint_upper_threshold = 3; static const int shorter_interval_tie_lower_threshold = -35; static const int shorter_interval_tie_upper_threshold = -35; static const int max_trailing_zeros = 7; }; template <> struct float_info { using carrier_uint = uint64_t; static const int significand_bits = 52; static const int exponent_bits = 11; static const int min_exponent = -1022; static const int max_exponent = 1023; static const int exponent_bias = -1023; static const int decimal_digits = 17; static const int kappa = 2; static const int big_divisor = 1000; static const int small_divisor = 100; static const int min_k = -292; static const int max_k = 326; static const int cache_bits = 128; static const int divisibility_check_by_5_threshold = 86; static const int case_fc_pm_half_lower_threshold = -2; static const int case_fc_pm_half_upper_threshold = 9; static const int case_fc_lower_threshold = -4; static const int case_fc_upper_threshold = 9; static const int case_shorter_interval_left_endpoint_lower_threshold = 2; static const int case_shorter_interval_left_endpoint_upper_threshold = 3; static const int shorter_interval_tie_lower_threshold = -77; static const int shorter_interval_tie_upper_threshold = -77; static const int max_trailing_zeros = 16; }; template struct decimal_fp { using significand_type = typename float_info::carrier_uint; significand_type significand; int exponent; }; template FMT_API auto to_decimal(T x) FMT_NOEXCEPT -> decimal_fp; } // namespace dragonbox template constexpr auto exponent_mask() -> typename dragonbox::float_info::carrier_uint { using uint = typename dragonbox::float_info::carrier_uint; return ((uint(1) << dragonbox::float_info::exponent_bits) - 1) << dragonbox::float_info::significand_bits; } // Writes the exponent exp in the form "[+-]d{2,3}" to buffer. template auto write_exponent(int exp, It it) -> It { FMT_ASSERT(-10000 < exp && exp < 10000, "exponent out of range"); if (exp < 0) { *it++ = static_cast('-'); exp = -exp; } else { *it++ = static_cast('+'); } if (exp >= 100) { const char* top = data::digits[exp / 100]; if (exp >= 1000) *it++ = static_cast(top[0]); *it++ = static_cast(top[1]); exp %= 100; } const char* d = data::digits[exp]; *it++ = static_cast(d[0]); *it++ = static_cast(d[1]); return it; } template auto format_float(T value, int precision, float_specs specs, buffer& buf) -> int; // Formats a floating-point number with snprintf. template auto snprintf_float(T value, int precision, float_specs specs, buffer& buf) -> int; template auto promote_float(T value) -> T { return value; } inline auto promote_float(float value) -> double { return static_cast(value); } template FMT_NOINLINE FMT_CONSTEXPR auto fill(OutputIt it, size_t n, const fill_t& fill) -> OutputIt { auto fill_size = fill.size(); if (fill_size == 1) return detail::fill_n(it, n, fill[0]); auto data = fill.data(); for (size_t i = 0; i < n; ++i) it = copy_str(data, data + fill_size, it); return it; } // Writes the output of f, padded according to format specifications in specs. // size: output size in code units. // width: output display width in (terminal) column positions. template FMT_CONSTEXPR auto write_padded(OutputIt out, const basic_format_specs& specs, size_t size, size_t width, F&& f) -> OutputIt { static_assert(align == align::left || align == align::right, ""); unsigned spec_width = to_unsigned(specs.width); size_t padding = spec_width > width ? spec_width - width : 0; auto* shifts = align == align::left ? data::left_padding_shifts : data::right_padding_shifts; size_t left_padding = padding >> shifts[specs.align]; size_t right_padding = padding - left_padding; auto it = reserve(out, size + padding * specs.fill.size()); if (left_padding != 0) it = fill(it, left_padding, specs.fill); it = f(it); if (right_padding != 0) it = fill(it, right_padding, specs.fill); return base_iterator(out, it); } template constexpr auto write_padded(OutputIt out, const basic_format_specs& specs, size_t size, F&& f) -> OutputIt { return write_padded(out, specs, size, size, f); } template FMT_CONSTEXPR auto write_bytes(OutputIt out, string_view bytes, const basic_format_specs& specs) -> OutputIt { return write_padded( out, specs, bytes.size(), [bytes](reserve_iterator it) { const char* data = bytes.data(); return copy_str(data, data + bytes.size(), it); }); } template auto write_ptr(OutputIt out, UIntPtr value, const basic_format_specs* specs) -> OutputIt { int num_digits = count_digits<4>(value); auto size = to_unsigned(num_digits) + size_t(2); auto write = [=](reserve_iterator it) { *it++ = static_cast('0'); *it++ = static_cast('x'); return format_uint<4, Char>(it, value, num_digits); }; return specs ? write_padded(out, *specs, size, write) : base_iterator(out, write(reserve(out, size))); } template FMT_CONSTEXPR auto write_char(OutputIt out, Char value, const basic_format_specs& specs) -> OutputIt { return write_padded(out, specs, 1, [=](reserve_iterator it) { *it++ = value; return it; }); } template FMT_CONSTEXPR auto write(OutputIt out, Char value, const basic_format_specs& specs, locale_ref loc = {}) -> OutputIt { return check_char_specs(specs) ? write_char(out, value, specs) : write(out, static_cast(value), specs, loc); } // Data for write_int that doesn't depend on output iterator type. It is used to // avoid template code bloat. template struct write_int_data { size_t size; size_t padding; FMT_CONSTEXPR write_int_data(int num_digits, unsigned prefix, const basic_format_specs& specs) : size((prefix >> 24) + to_unsigned(num_digits)), padding(0) { if (specs.align == align::numeric) { auto width = to_unsigned(specs.width); if (width > size) { padding = width - size; size = width; } } else if (specs.precision > num_digits) { size = (prefix >> 24) + to_unsigned(specs.precision); padding = to_unsigned(specs.precision - num_digits); } } }; // Writes an integer in the format // // where are written by write_digits(it). // prefix contains chars in three lower bytes and the size in the fourth byte. template FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, int num_digits, unsigned prefix, const basic_format_specs& specs, W write_digits) -> OutputIt { // Slightly faster check for specs.width == 0 && specs.precision == -1. if ((specs.width | (specs.precision + 1)) == 0) { auto it = reserve(out, to_unsigned(num_digits) + (prefix >> 24)); if (prefix != 0) { for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8) *it++ = static_cast(p & 0xff); } return base_iterator(out, write_digits(it)); } auto data = write_int_data(num_digits, prefix, specs); return write_padded( out, specs, data.size, [=](reserve_iterator it) { for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8) *it++ = static_cast(p & 0xff); it = detail::fill_n(it, data.padding, static_cast('0')); return write_digits(it); }); } template auto write_int_localized(OutputIt& out, UInt value, unsigned prefix, const basic_format_specs& specs, locale_ref loc) -> bool { static_assert(std::is_same, UInt>::value, ""); const auto sep_size = 1; auto ts = thousands_sep(loc); if (!ts.thousands_sep) return false; int num_digits = count_digits(value); int size = num_digits, n = num_digits; const std::string& groups = ts.grouping; std::string::const_iterator group = groups.cbegin(); while (group != groups.cend() && n > *group && *group > 0 && *group != max_value()) { size += sep_size; n -= *group; ++group; } if (group == groups.cend()) size += sep_size * ((n - 1) / groups.back()); char digits[40]; format_decimal(digits, value, num_digits); basic_memory_buffer buffer; if (prefix != 0) ++size; const auto usize = to_unsigned(size); buffer.resize(usize); basic_string_view s(&ts.thousands_sep, sep_size); // Index of a decimal digit with the least significant digit having index 0. int digit_index = 0; group = groups.cbegin(); auto p = buffer.data() + size - 1; for (int i = num_digits - 1; i > 0; --i) { *p-- = static_cast(digits[i]); if (*group <= 0 || ++digit_index % *group != 0 || *group == max_value()) continue; if (group + 1 != groups.cend()) { digit_index = 0; ++group; } std::uninitialized_copy(s.data(), s.data() + s.size(), make_checked(p, s.size())); p -= s.size(); } *p-- = static_cast(*digits); if (prefix != 0) *p = static_cast(prefix); auto data = buffer.data(); out = write_padded( out, specs, usize, usize, [=](reserve_iterator it) { return copy_str(data, data + size, it); }); return true; } FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) { prefix |= prefix != 0 ? value << 8 : value; prefix += (1u + (value > 0xff ? 1 : 0)) << 24; } template struct write_int_arg { UInt abs_value; unsigned prefix; }; template FMT_CONSTEXPR auto make_write_int_arg(T value, sign_t sign) -> write_int_arg> { auto prefix = 0u; auto abs_value = static_cast>(value); if (is_negative(value)) { prefix = 0x01000000 | '-'; abs_value = 0 - abs_value; } else { prefix = data::prefixes[sign]; } return {abs_value, prefix}; } template FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, write_int_arg arg, const basic_format_specs& specs, locale_ref loc) -> OutputIt { static_assert(std::is_same>::value, ""); auto abs_value = arg.abs_value; auto prefix = arg.prefix; auto utype = static_cast(specs.type); switch (specs.type) { case 0: case 'd': { if (specs.localized && write_int_localized(out, static_cast>(abs_value), prefix, specs, loc)) { return out; } auto num_digits = count_digits(abs_value); return write_int( out, num_digits, prefix, specs, [=](reserve_iterator it) { return format_decimal(it, abs_value, num_digits).end; }); } case 'x': case 'X': { if (specs.alt) prefix_append(prefix, (utype << 8) | '0'); bool upper = specs.type != 'x'; int num_digits = count_digits<4>(abs_value); return write_int( out, num_digits, prefix, specs, [=](reserve_iterator it) { return format_uint<4, Char>(it, abs_value, num_digits, upper); }); } case 'b': case 'B': { if (specs.alt) prefix_append(prefix, (utype << 8) | '0'); int num_digits = count_digits<1>(abs_value); return write_int(out, num_digits, prefix, specs, [=](reserve_iterator it) { return format_uint<1, Char>(it, abs_value, num_digits); }); } case 'o': { int num_digits = count_digits<3>(abs_value); if (specs.alt && specs.precision <= num_digits && abs_value != 0) { // Octal prefix '0' is counted as a digit, so only add it if precision // is not greater than the number of digits. prefix_append(prefix, '0'); } return write_int(out, num_digits, prefix, specs, [=](reserve_iterator it) { return format_uint<3, Char>(it, abs_value, num_digits); }); } case 'c': return write_char(out, static_cast(abs_value), specs); default: FMT_THROW(format_error("invalid type specifier")); } return out; } template ::value && !std::is_same::value && std::is_same>::value)> FMT_CONSTEXPR auto write(OutputIt out, T value, const basic_format_specs& specs, locale_ref loc) -> OutputIt { return write_int(out, make_write_int_arg(value, specs.sign), specs, loc); } // An inlined version of write used in format string compilation. template ::value && !std::is_same::value && !std::is_same>::value)> FMT_CONSTEXPR FMT_INLINE auto write(OutputIt out, T value, const basic_format_specs& specs, locale_ref loc) -> OutputIt { return write_int(out, make_write_int_arg(value, specs.sign), specs, loc); } template FMT_CONSTEXPR auto write(OutputIt out, basic_string_view s, const basic_format_specs& specs) -> OutputIt { auto data = s.data(); auto size = s.size(); if (specs.precision >= 0 && to_unsigned(specs.precision) < size) size = code_point_index(s, to_unsigned(specs.precision)); auto width = specs.width != 0 ? compute_width(basic_string_view(data, size)) : 0; return write_padded(out, specs, size, width, [=](reserve_iterator it) { return copy_str(data, data + size, it); }); } template FMT_CONSTEXPR auto write(OutputIt out, basic_string_view> s, const basic_format_specs& specs, locale_ref) -> OutputIt { return write(out, s, specs); } template FMT_CONSTEXPR auto write(OutputIt out, const Char* s, const basic_format_specs& specs, locale_ref) -> OutputIt { return check_cstring_type_spec(specs.type) ? write(out, basic_string_view(s), specs, {}) : write_ptr(out, to_uintptr(s), &specs); } template auto write_nonfinite(OutputIt out, bool isinf, basic_format_specs specs, const float_specs& fspecs) -> OutputIt { auto str = isinf ? (fspecs.upper ? "INF" : "inf") : (fspecs.upper ? "NAN" : "nan"); constexpr size_t str_size = 3; auto sign = fspecs.sign; auto size = str_size + (sign ? 1 : 0); // Replace '0'-padding with space for non-finite values. const bool is_zero_fill = specs.fill.size() == 1 && *specs.fill.data() == static_cast('0'); if (is_zero_fill) specs.fill[0] = static_cast(' '); return write_padded(out, specs, size, [=](reserve_iterator it) { if (sign) *it++ = static_cast(data::signs[sign]); return copy_str(str, str + str_size, it); }); } // A decimal floating-point number significand * pow(10, exp). struct big_decimal_fp { const char* significand; int significand_size; int exponent; }; inline auto get_significand_size(const big_decimal_fp& fp) -> int { return fp.significand_size; } template inline auto get_significand_size(const dragonbox::decimal_fp& fp) -> int { return count_digits(fp.significand); } template inline auto write_significand(OutputIt out, const char* significand, int& significand_size) -> OutputIt { return copy_str(significand, significand + significand_size, out); } template inline auto write_significand(OutputIt out, UInt significand, int significand_size) -> OutputIt { return format_decimal(out, significand, significand_size).end; } template ::value)> inline auto write_significand(Char* out, UInt significand, int significand_size, int integral_size, Char decimal_point) -> Char* { if (!decimal_point) return format_decimal(out, significand, significand_size).end; auto end = format_decimal(out + 1, significand, significand_size).end; if (integral_size == 1) { out[0] = out[1]; } else { std::uninitialized_copy_n(out + 1, integral_size, make_checked(out, to_unsigned(integral_size))); } out[integral_size] = decimal_point; return end; } template >::value)> inline auto write_significand(OutputIt out, UInt significand, int significand_size, int integral_size, Char decimal_point) -> OutputIt { // Buffer is large enough to hold digits (digits10 + 1) and a decimal point. Char buffer[digits10() + 2]; auto end = write_significand(buffer, significand, significand_size, integral_size, decimal_point); return detail::copy_str_noinline(buffer, end, out); } template inline auto write_significand(OutputIt out, const char* significand, int significand_size, int integral_size, Char decimal_point) -> OutputIt { out = detail::copy_str_noinline(significand, significand + integral_size, out); if (!decimal_point) return out; *out++ = decimal_point; return detail::copy_str_noinline(significand + integral_size, significand + significand_size, out); } template auto write_float(OutputIt out, const DecimalFP& fp, const basic_format_specs& specs, float_specs fspecs, Char decimal_point) -> OutputIt { auto significand = fp.significand; int significand_size = get_significand_size(fp); static const Char zero = static_cast('0'); auto sign = fspecs.sign; size_t size = to_unsigned(significand_size) + (sign ? 1 : 0); using iterator = reserve_iterator; int output_exp = fp.exponent + significand_size - 1; auto use_exp_format = [=]() { if (fspecs.format == float_format::exp) return true; if (fspecs.format != float_format::general) return false; // Use the fixed notation if the exponent is in [exp_lower, exp_upper), // e.g. 0.0001 instead of 1e-04. Otherwise use the exponent notation. const int exp_lower = -4, exp_upper = 16; return output_exp < exp_lower || output_exp >= (fspecs.precision > 0 ? fspecs.precision : exp_upper); }; if (use_exp_format()) { int num_zeros = 0; if (fspecs.showpoint) { num_zeros = fspecs.precision - significand_size; if (num_zeros < 0) num_zeros = 0; size += to_unsigned(num_zeros); } else if (significand_size == 1) { decimal_point = Char(); } auto abs_output_exp = output_exp >= 0 ? output_exp : -output_exp; int exp_digits = 2; if (abs_output_exp >= 100) exp_digits = abs_output_exp >= 1000 ? 4 : 3; size += to_unsigned((decimal_point ? 1 : 0) + 2 + exp_digits); char exp_char = fspecs.upper ? 'E' : 'e'; auto write = [=](iterator it) { if (sign) *it++ = static_cast(data::signs[sign]); // Insert a decimal point after the first digit and add an exponent. it = write_significand(it, significand, significand_size, 1, decimal_point); if (num_zeros > 0) it = detail::fill_n(it, num_zeros, zero); *it++ = static_cast(exp_char); return write_exponent(output_exp, it); }; return specs.width > 0 ? write_padded(out, specs, size, write) : base_iterator(out, write(reserve(out, size))); } int exp = fp.exponent + significand_size; if (fp.exponent >= 0) { // 1234e5 -> 123400000[.0+] size += to_unsigned(fp.exponent); int num_zeros = fspecs.precision - exp; #ifdef FMT_FUZZ if (num_zeros > 5000) throw std::runtime_error("fuzz mode - avoiding excessive cpu use"); #endif if (fspecs.showpoint) { if (num_zeros <= 0 && fspecs.format != float_format::fixed) num_zeros = 1; if (num_zeros > 0) size += to_unsigned(num_zeros) + 1; } return write_padded(out, specs, size, [&](iterator it) { if (sign) *it++ = static_cast(data::signs[sign]); it = write_significand(it, significand, significand_size); it = detail::fill_n(it, fp.exponent, zero); if (!fspecs.showpoint) return it; *it++ = decimal_point; return num_zeros > 0 ? detail::fill_n(it, num_zeros, zero) : it; }); } else if (exp > 0) { // 1234e-2 -> 12.34[0+] int num_zeros = fspecs.showpoint ? fspecs.precision - significand_size : 0; size += 1 + to_unsigned(num_zeros > 0 ? num_zeros : 0); return write_padded(out, specs, size, [&](iterator it) { if (sign) *it++ = static_cast(data::signs[sign]); it = write_significand(it, significand, significand_size, exp, decimal_point); return num_zeros > 0 ? detail::fill_n(it, num_zeros, zero) : it; }); } // 1234e-6 -> 0.001234 int num_zeros = -exp; if (significand_size == 0 && fspecs.precision >= 0 && fspecs.precision < num_zeros) { num_zeros = fspecs.precision; } bool pointy = num_zeros != 0 || significand_size != 0 || fspecs.showpoint; size += 1 + (pointy ? 1 : 0) + to_unsigned(num_zeros); return write_padded(out, specs, size, [&](iterator it) { if (sign) *it++ = static_cast(data::signs[sign]); *it++ = zero; if (!pointy) return it; *it++ = decimal_point; it = detail::fill_n(it, num_zeros, zero); return write_significand(it, significand, significand_size); }); } template ::value)> auto write(OutputIt out, T value, basic_format_specs specs, locale_ref loc = {}) -> OutputIt { if (const_check(!is_supported_floating_point(value))) return out; float_specs fspecs = parse_float_type_spec(specs); fspecs.sign = specs.sign; if (std::signbit(value)) { // value < 0 is false for NaN so use signbit. fspecs.sign = sign::minus; value = -value; } else if (fspecs.sign == sign::minus) { fspecs.sign = sign::none; } if (!std::isfinite(value)) return write_nonfinite(out, std::isinf(value), specs, fspecs); if (specs.align == align::numeric && fspecs.sign) { auto it = reserve(out, 1); *it++ = static_cast(data::signs[fspecs.sign]); out = base_iterator(out, it); fspecs.sign = sign::none; if (specs.width != 0) --specs.width; } memory_buffer buffer; if (fspecs.format == float_format::hex) { if (fspecs.sign) buffer.push_back(data::signs[fspecs.sign]); snprintf_float(promote_float(value), specs.precision, fspecs, buffer); return write_bytes(out, {buffer.data(), buffer.size()}, specs); } int precision = specs.precision >= 0 || !specs.type ? specs.precision : 6; if (fspecs.format == float_format::exp) { if (precision == max_value()) FMT_THROW(format_error("number is too big")); else ++precision; } if (const_check(std::is_same())) fspecs.binary32 = true; fspecs.use_grisu = is_fast_float(); int exp = format_float(promote_float(value), precision, fspecs, buffer); fspecs.precision = precision; Char point = fspecs.locale ? decimal_point(loc) : static_cast('.'); auto fp = big_decimal_fp{buffer.data(), static_cast(buffer.size()), exp}; return write_float(out, fp, specs, fspecs, point); } template ::value)> auto write(OutputIt out, T value) -> OutputIt { if (const_check(!is_supported_floating_point(value))) return out; using floaty = conditional_t::value, double, T>; using uint = typename dragonbox::float_info::carrier_uint; auto bits = bit_cast(value); auto fspecs = float_specs(); auto sign_bit = bits & (uint(1) << (num_bits() - 1)); if (sign_bit != 0) { fspecs.sign = sign::minus; value = -value; } static const auto specs = basic_format_specs(); uint mask = exponent_mask(); if ((bits & mask) == mask) return write_nonfinite(out, std::isinf(value), specs, fspecs); auto dec = dragonbox::to_decimal(static_cast(value)); return write_float(out, dec, specs, fspecs, static_cast('.')); } template ::value && !is_fast_float::value)> inline auto write(OutputIt out, T value) -> OutputIt { return write(out, value, basic_format_specs()); } template auto write(OutputIt out, monostate, basic_format_specs = {}, locale_ref = {}) -> OutputIt { FMT_ASSERT(false, ""); return out; } template FMT_CONSTEXPR auto write(OutputIt out, basic_string_view value) -> OutputIt { auto it = reserve(out, value.size()); it = copy_str_noinline(value.begin(), value.end(), it); return base_iterator(out, it); } template ::value)> constexpr auto write(OutputIt out, const T& value) -> OutputIt { return write(out, to_string_view(value)); } template ::value && !std::is_same::value && !std::is_same::value)> FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt { auto abs_value = static_cast>(value); bool negative = is_negative(value); // Don't do -abs_value since it trips unsigned-integer-overflow sanitizer. if (negative) abs_value = ~abs_value + 1; int num_digits = count_digits(abs_value); auto size = (negative ? 1 : 0) + static_cast(num_digits); auto it = reserve(out, size); if (auto ptr = to_pointer(it, size)) { if (negative) *ptr++ = static_cast('-'); format_decimal(ptr, abs_value, num_digits); return out; } if (negative) *it++ = static_cast('-'); it = format_decimal(it, abs_value, num_digits).end; return base_iterator(out, it); } // FMT_ENABLE_IF() condition separated to workaround MSVC bug template < typename Char, typename OutputIt, typename T, bool check = std::is_enum::value && !std::is_same::value && mapped_type_constant>::value != type::custom_type, FMT_ENABLE_IF(check)> FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt { return write( out, static_cast::type>(value)); } template ::value)> FMT_CONSTEXPR auto write(OutputIt out, T value, const basic_format_specs& specs = {}, locale_ref = {}) -> OutputIt { return specs.type && specs.type != 's' ? write(out, value ? 1 : 0, specs, {}) : write_bytes(out, value ? "true" : "false", specs); } template FMT_CONSTEXPR auto write(OutputIt out, Char value) -> OutputIt { auto it = reserve(out, 1); *it++ = value; return base_iterator(out, it); } template FMT_CONSTEXPR_CHAR_TRAITS auto write(OutputIt out, const Char* value) -> OutputIt { if (!value) { FMT_THROW(format_error("string pointer is null")); } else { auto length = std::char_traits::length(value); out = write(out, basic_string_view(value, length)); } return out; } template ::value)> auto write(OutputIt out, const T* value, const basic_format_specs& specs = {}, locale_ref = {}) -> OutputIt { check_pointer_type_spec(specs.type, error_handler()); return write_ptr(out, to_uintptr(value), &specs); } template FMT_CONSTEXPR auto write(OutputIt out, const T& value) -> typename std::enable_if< mapped_type_constant>::value == type::custom_type, OutputIt>::type { using context_type = basic_format_context; using formatter_type = conditional_t::value, typename context_type::template formatter_type, fallback_formatter>; context_type ctx(out, {}, {}); return formatter_type().format(value, ctx); } // An argument visitor that formats the argument and writes it via the output // iterator. It's a class and not a generic lambda for compatibility with C++11. template struct default_arg_formatter { using iterator = buffer_appender; using context = buffer_context; iterator out; basic_format_args args; locale_ref loc; template auto operator()(T value) -> iterator { return write(out, value); } auto operator()(typename basic_format_arg::handle h) -> iterator { basic_format_parse_context parse_ctx({}); context format_ctx(out, args, loc); h.format(parse_ctx, format_ctx); return format_ctx.out(); } }; template struct arg_formatter { using iterator = buffer_appender; using context = buffer_context; iterator out; const basic_format_specs& specs; locale_ref locale; template FMT_CONSTEXPR FMT_INLINE auto operator()(T value) -> iterator { return detail::write(out, value, specs, locale); } auto operator()(typename basic_format_arg::handle) -> iterator { // User-defined types are handled separately because they require access // to the parse context. return out; } }; template struct custom_formatter { basic_format_parse_context& parse_ctx; buffer_context& ctx; void operator()( typename basic_format_arg>::handle h) const { h.format(parse_ctx, ctx); } template void operator()(T) const {} }; template using is_integer = bool_constant::value && !std::is_same::value && !std::is_same::value && !std::is_same::value>; template class width_checker { public: explicit FMT_CONSTEXPR width_checker(ErrorHandler& eh) : handler_(eh) {} template ::value)> FMT_CONSTEXPR auto operator()(T value) -> unsigned long long { if (is_negative(value)) handler_.on_error("negative width"); return static_cast(value); } template ::value)> FMT_CONSTEXPR auto operator()(T) -> unsigned long long { handler_.on_error("width is not integer"); return 0; } private: ErrorHandler& handler_; }; template class precision_checker { public: explicit FMT_CONSTEXPR precision_checker(ErrorHandler& eh) : handler_(eh) {} template ::value)> FMT_CONSTEXPR auto operator()(T value) -> unsigned long long { if (is_negative(value)) handler_.on_error("negative precision"); return static_cast(value); } template ::value)> FMT_CONSTEXPR auto operator()(T) -> unsigned long long { handler_.on_error("precision is not integer"); return 0; } private: ErrorHandler& handler_; }; template