base64enc/0000755000175000017500000000000012555615471013347 5ustar sebastiansebastianbase64enc/man/0000755000175000017500000000000012464500575014117 5ustar sebastiansebastianbase64enc/man/base64.Rd0000644000175000017500000000432612464500575015477 0ustar sebastiansebastian\name{base64} \alias{base64} \alias{base64encode} \alias{base64decode} \title{ Encode/decode data into/from base64 encoding } \description{ \code{base64encode} encodes a data into base64 encoding. The source can be a file, binary connection or a raw vector. \code{base64decode} decodes a base64-encoded string into binary data. The source can be a string or a connection, the output is either a raw vector (\code{output=NULL}) or a binary connection. } \usage{ base64encode(what, linewidth, newline) base64decode(what, output = NULL, file) } \arguments{ \item{what}{data to be encoded/decoded. For \code{base64encode} it can be a raw vector, text connection or file name. For \code{base64decode} it can be a string or a binary connection.} \item{linewidth}{if set, the output is split into lines with at most \code{linewidth} characters per line. Zero or \code{NA} denotes no limit and values 1 .. 3 are silently treated as 4 since that is the shortest valid line.} \item{newline}{only applicable if \code{linewidth} is set; if set (string), the result will be a single string with all lines joined using the \code{newline} string} \item{output}{if \code{NULL} then the output will be a raw vector with the decoded data, otherwise it must be either a filename (string) or a binary connection.} \item{file}{file name (string) for data to use as input instead of \code{what}. It is essentially just a shorthand for \code{base64decode(file(name))}. Only one of \code{what} and \code{file} can be specified.} } %\details{ %} \value{ \code{base64encode}: A character vector. If \code{linewith > 0} and \code{newline} is not set then it will consist of as many elements as there are lines. Otherwise it is a single string. \code{base64decode}: If \code{output = NULL} then a raw vector with the decoded content, otherwise the number of bytes written into the connection. } %\references{ %} \author{ Simon Urbanek } %\note{ %} %\seealso{ %} \examples{ base64encode(1:100) base64encode(1:100, 70) base64encode(1:100, 70, "\n") x <- charToRaw("the decoded content, otherwise the number of bytes") y <- base64decode(base64encode(x)) stopifnot(identical(x, y)) } \keyword{manip} base64enc/man/dataURI.Rd0000644000175000017500000000260012464500575015675 0ustar sebastiansebastian\name{dataURI} \alias{dataURI} \title{ Create a data URI string } \description{ \code{dataURI} creates URI with the \code{data:} scheme by encoding the payload either using base64 ot URI encoding. } \usage{ dataURI(data, mime = "", encoding = "base64", file) } \arguments{ \item{data}{raw vector, connection or character vector to use as payload. Character vectors of more than one element are collapsed using \code{"\n"} before encoding.} \item{mime}{MIME-type of the data (per standard "" is interpreted as "text/plain;charset=US-ASCII" without including it in the URI)} \item{encoding}{data encoding to use. Must be either \code{"base64"} or \code{NULL}} \item{file}{filename (string) to open as payload. \code{file} and \code{data} are mutually exclusive} } %\details{ %} \value{ string of the form \code{data:[mime][;base64],} } \references{ \href{http://tools.ietf.org/html/rfc2397}{RFC 2397 The "data" URL scheme} } \author{ Simon Urbanek } %\note{ %} \examples{ dataURI(as.raw(1:10)) # default is base64 dataURI(as.raw(1:10), encoding=NULL) # URI if (require("png", quietly=TRUE)) { # let's say you have an image - e.g. from dev.capture(TRUE) img <- matrix(1:16/16, 4) dataURI(writePNG(img), "image/png") # or straight from a file dataURI(file=system.file("img", "Rlogo.png", package="png"), mime="image/png") } } \keyword{manip} base64enc/man/checkUTF8.Rd0000644000175000017500000000230212464500575016127 0ustar sebastiansebastian\name{checkUTF8} \alias{checkUTF8} \title{ Check the validity of a byte stream ot be interpreted as UTF8. } \description{ \code{checkUTF8} check whether a given raw vector can be used as a valid string encoded in UTF8. } \usage{ checkUTF8(what, quiet = FALSE, charlen = FALSE, min.char = 1L) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{what}{raw vector with the payload} \item{quiet}{logical, if \code{TRUE} then the function will not fail but report success/failure via its result, otherwise failures are considered errors.} \item{charlen}{logical, if \code{TRUE} then the function returns the length of the longest byte sequence representing a character in the file.} \item{min.char}{integer, any bytes below this value are considered control chacters and reported as errors. The default value of 1L guards against strings including NULs.} } \value{ If \code{charlen=FALSE}: \code{TRUE} on success, \code{FALSE} if the payload is invalid and \code{quite=TRUE}. If \code{charlen=TRUE}: positive integer corresponding to the longest encoded sequence on success, negative integer on failure. } \author{ Simon Urbanek } %\examples{ %} \keyword{manip} base64enc/NAMESPACE0000644000175000017500000000017512464500575014566 0ustar sebastiansebastianuseDynLib(base64enc, B64_encode, B64_decode, C_URIencode, utf8_check) export(base64encode, base64decode, dataURI, checkUTF8) base64enc/MD50000644000175000017500000000112312555615471013654 0ustar sebastiansebastian77354bac06985ef4a141add5b58ac7d5 *DESCRIPTION 4e8d397738d19055d1c3a16acf23973f *NAMESPACE 3e1dd6a53f755cce26ebc2cda8bede20 *NEWS 855863aba618475b3ebf6e925261ea7e *R/URI.R bdfb17d6280ba458758052e5747f1786 *R/UTF8.R 428fd562487eb269f08ae6b1ff1e9941 *R/base64.R 084e60bb1bdbd018b7b3ea592b408474 *R/dataURI.R df95cb6cc2776670af1d2afb0496d40b *man/base64.Rd e57327734fe665f44fdb0901ca89fb29 *man/checkUTF8.Rd 6aa8fe2aa56a68b82a383f8843bfb1ef *man/dataURI.Rd 3ab7e2d18aecc946b0b18d7285ba043b *src/base64.c 1d4011c2a805f12db1ff522f88cf1be4 *src/uriencode.c 0ae6e48f1fbd675ba91c8a591e84ae1e *src/utf8.c base64enc/DESCRIPTION0000644000175000017500000000100412555615471015050 0ustar sebastiansebastianPackage: base64enc Version: 0.1-3 Title: Tools for base64 encoding Author: Simon Urbanek Maintainer: Simon Urbanek Depends: R (>= 2.9.0) Enhances: png Description: This package provides tools for handling base64 encoding. It is more flexible than the orphaned base64 package. License: GPL-2 | GPL-3 URL: http://www.rforge.net/base64enc NeedsCompilation: yes Packaged: 2015-02-04 20:31:00 UTC; svnuser Repository: CRAN Date/Publication: 2015-07-28 08:03:37 base64enc/R/0000755000175000017500000000000012464500575013545 5ustar sebastiansebastianbase64enc/R/URI.R0000644000175000017500000000026312464500575014330 0ustar sebastiansebastianURIencode <- function(what, reserved=NULL) .Call(C_URIencode, what, if (is.logical(reserved)) { if (isTRUE(reserved == FALSE)) ";/?:@=&" else "" } else as.character(reserved)) base64enc/R/dataURI.R0000644000175000017500000000154712464500575015170 0ustar sebastiansebastiandataURI <- function(data, mime="", encoding="base64", file) { if (!is.null(encoding) && !isTRUE(encoding == "base64")) stop('encoding must be either NULL or "base64"') prefix <- paste("data:", as.character(mime)[1], if (!is.null(encoding)) ";base64", ",", sep ='') if (!missing(file)) { if (!missing(data)) stop("data and file are mutually exclusive") data <- con <- file(file, "rb") on.exit(close(con)) } if (inherits(data, "connection")) { if (isTRUE(summary(data)$text == "binary")) { l <- list() while (length(r <- readBin(data, raw(0), 1048576L))) l <- c(l, r) data <- unlist(l) } else data <- readLines(data) } if (!is.raw(data)) data <- paste(as.character(data), collapse='\n') paste(prefix, if (is.null(encoding)) .Call(C_URIencode, data, NULL) else .Call(B64_encode, data, 0L, NULL), sep='') } base64enc/R/base64.R0000644000175000017500000000345712464500575014765 0ustar sebastiansebastianbase64encode <- function(what, linewidth, newline) { linewidth <- if (missing(linewidth) || !is.numeric(linewidth) || length(linewidth) < 1L) 0L else as.integer(linewidth[1L]) if (is.na(linewidth)) linewidth <- 0L else if (linewidth > 0L && linewidth < 4L) linewidth <- 4L if (missing(newline)) newline <- NULL fi <- NULL if (is.character(what)) { what <- file(what, "rb") on.exit(close(what)) } if (inherits(what, "connection")) { slice <- 65535L ## default slice size - must be divisible by 3 if (linewidth > 0L) { ## we have to make sure the slices span whole lines if (linewidth %% 4L > 0) linewidth <- linewidth - linewidth %% 4L bw <- as.integer(linewidth / 4L) * 3L if (slice %% bw > 0L) slice <- slice + (bw - (slice %% bw)) } l <- list() while (length(r <- readBin(what, raw(0), slice))) l <- c(l, .Call(B64_encode, r, linewidth, newline)) if (linewidth > 0L && is.null(newline)) unlist(l) else paste(unlist(l), collapse = if (is.null(newline)) "" else newline) } else .Call(B64_encode, as.raw(what), linewidth, newline) } base64decode <- function(what, output=NULL, file) { if (!missing(file) && !missing(what)) stop("'what' and 'file' are mutually exclusive") if (!missing(file)) { what <- file(file, "r") on.exit(close(what)) } if (is.character(output)) { output <- file(output, "wb") on.exit(close(output)) } else if (!inherits(output, "connection") && !is.null(output)) stop("output must be a filename, connection or NULL") r <- if (inherits(what, "connection")) { ## FIXME: we may want to use chunking ... .Call(B64_decode, readLines(what, warn=FALSE)) } else .Call(B64_decode, what) if (inherits(output, "connection")) { writeBin(r, output) invisible(length(r)) } else r } base64enc/R/UTF8.R0000644000175000017500000000016712464500575014422 0ustar sebastiansebastiancheckUTF8 <- function(what, quiet=FALSE, charlen=FALSE, min.char=1L) .Call(utf8_check, what, quiet, charlen, min.char) base64enc/NEWS0000644000175000017500000000100212464500575014034 0ustar sebastiansebastian0.1-3 (under development) o add checkUTF8() which checks the validity of a raw vector for use as a UTF8 string 0.1-2 2014-06-26 o bugfix: encoding content of more than 65536 bytes without linebreaks produced padding characters between chunks because chunk size was not divisible by three. 0.1-1 2012-11-05 o fix a bug in base64decode where output is a file name o add base64decode(file=...) as a (non-leaking) shorthand for base64decode(file(...)) 0.1-0 2012-09-07 o initial CRAN release base64enc/src/0000755000175000017500000000000012464500575014133 5ustar sebastiansebastianbase64enc/src/base64.c0000644000175000017500000001223012464500604015352 0ustar sebastiansebastian/* base64.c - encoding/decoding of base64 (C)Copyright 2011,12 Simon Urbanek Licensed under a choice of GPLv2 or GPLv3 */ /* int for now but it should be something like R_xlen_t -- must be signed, though! */ #define blen_t int /* -- base64 encode/decode -- */ static char *base64encode(const unsigned char *src, blen_t len, char *dst); static int base64decode(const char *src, void *dst, blen_t max_len); static const char *b64tab = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; #define SRC(i) ((i < len) ? src[i] : 0) /* guarded access to src[] */ /* dst must be at least (len + 2) / 3 * 4 + 1 bytes long and will be NUL terminated when done */ static char *base64encode(const unsigned char *src, blen_t len, char *dst) { while (len >= 3) { /* no need to worry about padding - faster */ *(dst++) = b64tab[src[0] >> 2]; *(dst++) = b64tab[((src[0] & 0x03) << 4) | ((src[1] & 0xf0) >> 4)]; *(dst++) = b64tab[((src[1] & 0x0f) << 2) | ((src[2] & 0xc0) >> 6)]; *(dst++) = b64tab[src[2] & 0x3f]; src += 3; len -= 3; } if (len > 0) { /* last chunk - may need padding and guarding against OOB */ *(dst++) = b64tab[src[0] >> 2]; *(dst++) = b64tab[((src[0] & 0x03) << 4) | ((SRC(1) & 0xf0) >> 4)]; *(dst++) = (len > 1) ? b64tab[((src[1] & 0x0f) << 2) | ((SRC(2) & 0xc0) >> 6)] : '='; *(dst++) = (len > 2) ? b64tab[src[2] & 0x3f] : '='; } *dst = 0; return dst; } #undef SRC static unsigned int val(const char **src) { while (1) { char c = **src; if (c) src[0]++; else return 0x10000; if (c >= 'A' && c <= 'Z') return c - 'A'; if (c >= 'a' && c <= 'z') return c - 'a' + 26; if (c >= '0' && c <= '9') return c - '0' + 52; if (c == '+') return 62; if (c == '/') return 63; if (c == '=') return 0x10000; /* we loop as to skip any blanks, newlines etc. */ } } /* returns the decoded length or -1 if max_len was not enough */ static int base64decode(const char *src, void *dst, blen_t max_len) { unsigned char *t = (unsigned char*) dst, *end = t + max_len; while (*src && t < end) { unsigned int v = val(&src); if (v > 64) break; *t = v << 2; v = val(&src); *t |= v >> 4; if (v < 64) { if (++t == end) return -1; *t = v << 4; v = val(&src); *t |= v >> 2; if (v < 64) { if (++t == end) return -1; *t = v << 6; v = val(&src); *t |= v & 0x3f; if (v < 64) t++; } } } return (blen_t) (t - (unsigned char*) dst); } static char stb[8192]; #include #include SEXP B64_encode(SEXP what, SEXP linewidth, SEXP newline) { const char *nl = 0; char *buf = stb; const unsigned char *src = (const unsigned char*) RAW(what); blen_t buflen = sizeof(stb), slice; int lwd = 0, len = LENGTH(what), step; if (len == 0) return allocVector(STRSXP, 0); if (TYPEOF(newline) == STRSXP && LENGTH(newline) > 0) nl = CHAR(STRING_ELT(newline, 0)); if (TYPEOF(linewidth) == INTSXP || TYPEOF(linewidth) == REALSXP) lwd = asInteger(linewidth); if (lwd <= 0) lwd = 0; else if (lwd < 4) lwd = 4; /* there must be at least 4 chars per line */ lwd -= lwd & 3; step = lwd / 4 * 3; /* make sure we get big enough buffer for what we need to do */ if (lwd == 0 || nl) { blen_t nll = nl ? strlen(nl) : 0; slice = (blen_t) len * 4 / 3 + 4; if (lwd && nll) slice += (slice / lwd + 1) * nll; if (slice > buflen) { buf = R_alloc(256, (slice >> 8) + 1); /* making sure we can use at least 73 bits where possible */ buflen = slice; } if (lwd == 0 || len <= step) { /* easy, jsut call encode and out */ base64encode(src, len, buf); return mkString(buf); } /* one string but with NLs */ { char *dst = buf; while (len) { int amt = (len > step) ? step : len; dst = base64encode(src, amt, dst); src += amt; len -= amt; if (len) { strcpy(dst, nl); dst += nll; } } return mkString(buf); } } else { /* lwd and no nl = vector result */ int i = 0; SEXP res = PROTECT(allocVector(STRSXP, len / step + 1)); slice = lwd + 1; if (slice > buflen) { buf = R_alloc(4, (slice >> 2) + 1); buflen = slice; } while(len) { int amt = (len > step) ? step : len; base64encode(src, amt, buf); src += amt; SET_STRING_ELT(res, i++, mkChar(buf)); len -= amt; } if (i < LENGTH(res)) SETLENGTH(res, i); UNPROTECT(1); return res; } } SEXP B64_decode(SEXP what) { /* we need to allocate enough space to decode. FIXME: For now, we assume it's full of payload; we will over-allocate if there is junk behind it */ blen_t tl = 0; SEXP res; int ns = LENGTH(what), i; unsigned char *dst; if (TYPEOF(what) != STRSXP) Rf_error("I can only decode base64 strings"); for (i = 0; i < ns; i++) tl += strlen(CHAR(STRING_ELT(what, i))); tl = (tl / 4) * 3 + 4; res = allocVector(RAWSXP, tl); dst = (unsigned char*) RAW(res); for (i = 0; i < ns; i++) { blen_t al = base64decode(CHAR(STRING_ELT(what, i)), dst, tl); if (al < 0) /* this should never happen as we allocated enough space ... */ Rf_error("decoding error - insufficient buffer space"); tl -= al; dst += al; } SETLENGTH(res, dst - ((unsigned char*) RAW(res))); return res; } base64enc/src/utf8.c0000644000175000017500000000464412464500604015166 0ustar sebastiansebastian#include #include #define report(reason) { snprintf(cause, sizeof(cause), "INVALID byte 0x%02x at 0x%lx (%lu, line %lu): %s\n", (int) buf[i], i, i, line, reason); if (max_cl) *max_cl = maxcl; return 1; } static char cause[512]; static int utf8_check_(const unsigned char *buf, unsigned long len, int *max_cl, int min_char) { unsigned long i = 0, bp = len, line = 1; int maxcl = 1; while (i < bp) { if (min_char > 0 && buf[i] < min_char) report("disallowed control character"); if (buf[i] < 128) { if (buf[i] == '\n') line++; } else if (buf[i] < 192) { report("2+ byte of a sequence found in first position"); } else if (buf[i] < 194) { report("overlong encoding (<=127 encoded)"); } else if (buf[i] < 224) { /* 2-byte seq */ if (i + 1 < bp) { i++; if (buf[i] < 0x80 || buf[i] > 0xbf) { report("invalid second byte in 2-byte encoding"); } if (maxcl < 2) maxcl = 2; } else break; } else if (buf[i] < 240) { /* 3-byte seq */ if (i + 2 < bp) { i++; if (buf[i] < 0x80 || buf[i] > 0xbf) { report("invalid second byte in 3-byte encoding"); } i++; if (buf[i] < 0x80 || buf[i] > 0xbf) { report("invalid third byte in 3-byte encoding"); } if (maxcl < 3) maxcl = 3; } else break; } else if (buf[i] < 245) { /* 4-byte seq */ if (i + 3 < bp) { i++; if (buf[i] < 0x80 || buf[i] > 0xbf) { report("invalid second byte in 4-byte encoding"); } i++; if (buf[i] < 0x80 || buf[i] > 0xbf) { report("invalid third byte in 4-byte encoding"); } i++; if (buf[i] < 0x80 || buf[i] > 0xbf) { report("invalid fourth byte in 4-byte encoding"); } if (maxcl < 3) maxcl = 3; } else break; } else if (buf[i] < 254) { report("invalid start of a codepoint above 0x10FFFF"); } else { report("invalid start byte (FE/FF)"); } i++; } bp -= i; if (bp > 0) report("unterminated multi-byte sequence at the end of file"); return 0; } SEXP utf8_check(SEXP sWhat, SEXP sQuiet, SEXP sXLen, SEXP sMinChar) { if (TYPEOF(sWhat) != RAWSXP) Rf_error("invalid input"); { int maxcl = 0; int res = utf8_check_((const unsigned char*) RAW(sWhat), XLENGTH(sWhat), &maxcl, asInteger(sMinChar)); if (asInteger(sQuiet) == 0 && res) Rf_error("%s", cause); if (asInteger(sXLen) != 0) return ScalarInteger((res == 0) ? maxcl : (-maxcl)); return ScalarLogical((res == 0) ? TRUE : FALSE); } } base64enc/src/uriencode.c0000644000175000017500000000410312464500604016243 0ustar sebastiansebastian#include #include static const char *plain = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~"; static const char *hex = "0123456789ABCDEF"; /* flexible and fast for long strings. Since short string are, well, short, the overhead of building a table should play no role */ SEXP C_URIencode(SEXP what, SEXP resrv) { SEXP res; char tab[256]; const unsigned char *c = (const unsigned char*) plain; if (TYPEOF(what) != STRSXP && TYPEOF(what) != RAWSXP) Rf_error("input must be a raw or character vector"); memset(tab, 0, sizeof(tab)); while (*c) tab[*(c++)] = 1; if (TYPEOF(resrv) == STRSXP) { int n = LENGTH(resrv), i; for (i = 0; i < n; i++) { c = (const unsigned char*) CHAR(STRING_ELT(resrv, i)); while (*c) tab[*(c++)] = 1; } } if (TYPEOF(what) == RAWSXP) { int len = 0; const unsigned char *cend = (c = (const unsigned char*) RAW(what)) + LENGTH(what); char *enc, *ce; while (c < cend) len += tab[*(c++)] ? 1 : 3; ce = enc = (char*) R_alloc(1, len + 1); c = (const unsigned char*) RAW(what); while (c < cend) if (tab[*c]) *(ce++) = *(c++); else { *(ce++) = '%'; *(ce++) = hex[*c >> 4]; *(ce++) = hex[*(c++) & 0x0F]; } *ce = 0; return mkString(enc); } else { int i, n = LENGTH(what), maxlen = 0; char *enc, *ce; res = allocVector(STRSXP, n); if (n == 0) return res; PROTECT(res); /* find the longest encoded string to allocate buffer */ for (i = 0; i < n; i++) { /* FIXME: we should tanslate to UTF8 */ int len = 0; c = (const unsigned char*) CHAR(STRING_ELT(what, i)); while (*c) len += tab[*(c++)] ? 1 : 3; if (len > maxlen) maxlen = len; } enc = (char*) R_alloc(1, maxlen + 1); for (i = 0; i < n; i++) { c = (const unsigned char*) CHAR(STRING_ELT(what, i)); ce = enc; while (*c) if (tab[*c]) *(ce++) = *(c++); else { *(ce++) = '%'; *(ce++) = hex[*c >> 4]; *(ce++) = hex[*(c++) & 0x0F]; } *ce = 0; SET_STRING_ELT(res, i, mkChar(enc)); } UNPROTECT(1); return res; } }