sourcetools/0000755000176200001440000000000013267774041012646 5ustar liggesuserssourcetools/inst/0000755000176200001440000000000013044455744013622 5ustar liggesuserssourcetools/inst/include/0000755000176200001440000000000013267770776015261 5ustar liggesuserssourcetools/inst/include/sourcetools/0000755000176200001440000000000013267774041017627 5ustar liggesuserssourcetools/inst/include/sourcetools/r/0000755000176200001440000000000013267770776020103 5ustar liggesuserssourcetools/inst/include/sourcetools/r/RUtils.h0000644000176200001440000000336713267770776021507 0ustar liggesusers#ifndef SOURCETOOLS_R_R_UTILS_H #define SOURCETOOLS_R_R_UTILS_H #include #include #include namespace sourcetools { namespace r { class Protect : noncopyable { public: Protect(): n_(0) {} ~Protect() { UNPROTECT(n_); } SEXP operator()(SEXP objectSEXP) { ++n_; return PROTECT(objectSEXP); } private: int n_; }; class RObjectFactory : noncopyable { public: RObjectFactory() : n_(0) { } template SEXP create(SEXPTYPE type, const std::vector& vector, F f) { ++n_; std::size_t n = vector.size(); SEXP resultSEXP = PROTECT(Rf_allocVector(type, n)); for (std::size_t i = 0; i < n; ++i) f(resultSEXP, i, vector[i]); return resultSEXP; } SEXP create(SEXPTYPE type, std::size_t n) { ++n_; return PROTECT(Rf_allocVector(type, n)); } ~RObjectFactory() { UNPROTECT(n_); } private: std::size_t n_; }; class ListBuilder : noncopyable { public: void add(const std::string& name, SEXP value) { names_.push_back(name); data_.push_back(protect_(value)); } operator SEXP() const { std::size_t n = data_.size(); SEXP resultSEXP = protect_(Rf_allocVector(VECSXP, n)); SEXP namesSEXP = protect_(Rf_allocVector(STRSXP, n)); for (std::size_t i = 0; i < n; ++i) { SET_VECTOR_ELT(resultSEXP, i, data_[i]); SET_STRING_ELT(namesSEXP, i, Rf_mkCharLen(names_[i].c_str(), names_[i].size())); } Rf_setAttrib(resultSEXP, R_NamesSymbol, namesSEXP); return resultSEXP; } private: std::vector names_; std::vector data_; mutable Protect protect_; }; } // namespace r } // namespace sourcetools #endif /* SOURCETOOLS_R_R_UTILS_H */ sourcetools/inst/include/sourcetools/r/RFunctions.h0000644000176200001440000000405313267770776022350 0ustar liggesusers#ifndef SOURCETOOLS_R_R_FUNCTIONS_H #define SOURCETOOLS_R_R_FUNCTIONS_H #include #include #include namespace sourcetools { namespace r { inline SEXP eval(const std::string& fn, SEXP envSEXP = NULL) { Protect protect; if (envSEXP == NULL) { SEXP strSEXP = protect(Rf_mkString("sourcetools")); envSEXP = R_FindNamespace(strSEXP); } SEXP callSEXP = protect(Rf_lang1(Rf_install(fn.c_str()))); SEXP resultSEXP = protect(Rf_eval(callSEXP, envSEXP)); return resultSEXP; } inline std::set objectsOnSearchPath() { std::set results; Protect protect; SEXP objectsSEXP; protect(objectsSEXP = eval("search_objects")); for (R_xlen_t i = 0; i < Rf_length(objectsSEXP); ++i) { SEXP strSEXP = VECTOR_ELT(objectsSEXP, i); for (R_xlen_t j = 0; j < Rf_length(strSEXP); ++j) { SEXP charSEXP = STRING_ELT(strSEXP, j); std::string element(CHAR(charSEXP), Rf_length(charSEXP)); results.insert(element); } } return results; } namespace util { inline void setNames(SEXP dataSEXP, const char** names, std::size_t n) { RObjectFactory factory; SEXP namesSEXP = factory.create(STRSXP, n); for (std::size_t i = 0; i < n; ++i) SET_STRING_ELT(namesSEXP, i, Rf_mkChar(names[i])); Rf_setAttrib(dataSEXP, R_NamesSymbol, namesSEXP); } inline void listToDataFrame(SEXP listSEXP, int n) { r::Protect protect; SEXP classSEXP = protect(Rf_mkString("data.frame")); Rf_setAttrib(listSEXP, R_ClassSymbol, classSEXP); SEXP rownamesSEXP = protect(Rf_allocVector(INTSXP, 2)); INTEGER(rownamesSEXP)[0] = NA_INTEGER; INTEGER(rownamesSEXP)[1] = -n; Rf_setAttrib(listSEXP, R_RowNamesSymbol, rownamesSEXP); } inline SEXP functionBody(SEXP fnSEXP) { SEXP bodyFunctionSEXP = Rf_findFun(Rf_install("body"), R_BaseNamespace); r::Protect protect; SEXP callSEXP = protect(Rf_lang2(bodyFunctionSEXP, fnSEXP)); return Rf_eval(callSEXP, R_BaseNamespace); } } // namespace util } // namespace r } // namespace sourcetools #endif /* SOURCETOOLS_R_R_FUNCTIONS_H */ sourcetools/inst/include/sourcetools/r/r.h0000644000176200001440000000050213071243763020472 0ustar liggesusers#ifndef SOURCETOOLS_R_R_H #define SOURCETOOLS_R_R_H #include #include #include #include #include #include #endif /* SOURCETOOLS_R_R_H */ sourcetools/inst/include/sourcetools/r/RCallRecurser.h0000644000176200001440000000253213071243763022746 0ustar liggesusers#ifndef SOURCETOOLS_R_R_CALL_RECURSER_H #define SOURCETOOLS_R_R_CALL_RECURSER_H #include #include #include #include namespace sourcetools { namespace r { class CallRecurser : noncopyable { public: class Operation { public: virtual void apply(SEXP dataSEXP) = 0; virtual ~Operation() {} }; explicit CallRecurser(SEXP dataSEXP) { if (Rf_isPrimitive(dataSEXP)) dataSEXP_ = R_NilValue; else if (Rf_isFunction(dataSEXP)) dataSEXP_ = r::util::functionBody(dataSEXP); else if (TYPEOF(dataSEXP) == LANGSXP) dataSEXP_ = dataSEXP; else dataSEXP_ = R_NilValue; } void add(Operation* pOperation) { operations_.push_back(pOperation); } void run() { runImpl(dataSEXP_); } void runImpl(SEXP dataSEXP) { for (std::vector::iterator it = operations_.begin(); it != operations_.end(); ++it) { (*it)->apply(dataSEXP); } if (TYPEOF(dataSEXP) == LANGSXP) { while (dataSEXP != R_NilValue) { runImpl(CAR(dataSEXP)); dataSEXP = CDR(dataSEXP); } } } private: SEXP dataSEXP_; std::vector operations_; }; } // namespace r } // namespace sourcetools #endif /* SOURCETOOLS_R_R_CALL_RECURSER_H */ sourcetools/inst/include/sourcetools/r/RNonStandardEvaluation.h0000644000176200001440000000545713267770776024654 0ustar liggesusers#ifndef SOURCETOOLS_R_R_NON_STANDARD_EVALUATION_H #define SOURCETOOLS_R_R_NON_STANDARD_EVALUATION_H #include #include #include #include namespace sourcetools { namespace r { namespace nse { namespace detail { inline std::set makeNsePrimitives() { std::set instance; instance.insert("quote"); instance.insert("substitute"); instance.insert("eval"); instance.insert("evalq"); instance.insert("lazy_dots"); return instance; } inline std::set& nsePrimitives() { static std::set instance = makeNsePrimitives(); return instance; } class PerformsNonStandardEvaluationOperation : public r::CallRecurser::Operation { public: PerformsNonStandardEvaluationOperation() : status_(false) { } virtual void apply(SEXP dataSEXP) { if (status_ || TYPEOF(dataSEXP) != LANGSXP) return; if ((status_ = checkCall(dataSEXP))) return; SEXP fnSEXP = CAR(dataSEXP); if (TYPEOF(fnSEXP) == SYMSXP) status_ = nsePrimitives().count(CHAR(PRINTNAME(fnSEXP))); else if (TYPEOF(fnSEXP) == STRSXP) status_ = nsePrimitives().count(CHAR(STRING_ELT(fnSEXP, 0))); } bool status() const { return status_; } private: bool checkCall(SEXP callSEXP) { std::size_t n = Rf_length(callSEXP); if (n == 0) return false; SEXP fnSEXP = CAR(callSEXP); if (fnSEXP == Rf_install("::") || fnSEXP == Rf_install(":::")) { SEXP lhsSEXP = CADR(callSEXP); SEXP rhsSEXP = CADDR(callSEXP); if (lhsSEXP == Rf_install("lazyeval") && rhsSEXP == Rf_install("lazy_dots")) return true; } return false; } private: bool status_; }; } // namespace detail class Database { public: bool check(SEXP dataSEXP) { if (contains(dataSEXP)) return get(dataSEXP); typedef detail::PerformsNonStandardEvaluationOperation Operation; scoped_ptr operation(new Operation); r::CallRecurser recurser(dataSEXP); recurser.add(operation); recurser.run(); set(dataSEXP, operation->status()); return operation->status(); } private: bool contains(SEXP dataSEXP) { return map_.count(address(dataSEXP)); } bool get(SEXP dataSEXP) { return map_[address(dataSEXP)]; } void set(SEXP dataSEXP, bool value) { map_[address(dataSEXP)] = value; } std::size_t address(SEXP dataSEXP) { return reinterpret_cast(dataSEXP); } std::map map_; }; inline Database& database() { static Database instance; return instance; } inline bool performsNonStandardEvaluation(SEXP fnSEXP) { return database().check(fnSEXP); } } // namespace nse } // namespace r } // namespace sourcetools #endif /* SOURCETOOLS_R_R_NON_STANDARD_EVALUATION_H */ sourcetools/inst/include/sourcetools/r/RHeaders.h0000644000176200001440000000024713071243763021734 0ustar liggesusers#ifndef SOURCETOOLS_R_R_HEADERS_H #define SOURCETOOLS_R_R_HEADERS_H #define R_NO_REMAP #include #include #endif /* SOURCETOOLS_R_R_HEADERS_H */ sourcetools/inst/include/sourcetools/r/RConverter.h0000644000176200001440000000160013267770776022342 0ustar liggesusers#ifndef SOURCETOOLS_R_R_CONVERTER_H #define SOURCETOOLS_R_R_CONVERTER_H #include #include #include #include namespace sourcetools { namespace r { inline SEXP Rf_mkChar(const std::string& data) { return Rf_mkCharLen(data.c_str(), data.size()); } inline SEXP Rf_mkString(const std::string& data) { Protect protect; SEXP resultSEXP = protect(Rf_allocVector(STRSXP, 1)); SET_STRING_ELT(resultSEXP, 0, Rf_mkChar(data)); return resultSEXP; } inline SEXP create(const std::vector& vector) { Protect protect; std::size_t n = vector.size(); SEXP resultSEXP = protect(Rf_allocVector(STRSXP, n)); for (std::size_t i = 0; i < n; ++i) SET_STRING_ELT(resultSEXP, i, Rf_mkChar(vector[i])); return resultSEXP; } } // namespace r } // namespace sourcetools #endif /* SOURCETOOLS_R_R_CONVERTER_H */ sourcetools/inst/include/sourcetools/tokenization/0000755000176200001440000000000013267770776022360 5ustar liggesuserssourcetools/inst/include/sourcetools/tokenization/Tokenizer.h0000644000176200001440000002740013267770776024506 0ustar liggesusers#ifndef SOURCETOOLS_TOKENIZATION_TOKENIZER_H #define SOURCETOOLS_TOKENIZATION_TOKENIZER_H #include #include #include #include #include #include namespace sourcetools { namespace tokenizer { class Tokenizer { private: typedef tokens::Token Token; typedef cursors::TextCursor TextCursor; typedef tokens::TokenType TokenType; private: // Tokenization ---- void consumeToken(TokenType type, std::size_t length, Token* pToken) { *pToken = Token(cursor_, type, length); cursor_.advance(length); } template void consumeUntil(char ch, TokenType type, Token* pToken) { TextCursor lookahead = cursor_; bool success = false; std::size_t distance = 0; while (lookahead != lookahead.end()) { lookahead.advance(); ++distance; if (SkipEscaped && lookahead.peek() == '\\') { lookahead.advance(); ++distance; continue; } if (lookahead.peek() == ch) { success = true; break; } } if (success) { consumeToken(type, distance + 1, pToken); } else { consumeToken( InvalidOnError ? tokens::INVALID : type, distance, pToken ); } } void consumeUserOperator(Token* pToken) { consumeUntil('%', tokens::OPERATOR_USER, pToken); } void consumeComment(Token* pToken) { consumeUntil('\n', tokens::COMMENT, pToken); } void consumeQuotedSymbol(Token* pToken) { consumeUntil('`', tokens::SYMBOL, pToken); } void consumeQString(Token* pToken) { consumeUntil('\'', tokens::STRING, pToken); } void consumeQQString(Token* pToken) { consumeUntil('"', tokens::STRING, pToken); } // NOTE: Don't tokenize '-' or '+' as part of number; instead // it's parsed as a unary operator. bool isStartOfNumber() { char ch = cursor_.peek(); if (utils::isDigit(ch)) return true; if (ch == '.') return utils::isDigit(cursor_.peek(1)); return false; } bool isStartOfSymbol() { return utils::isValidForStartOfRSymbol(cursor_.peek()); } bool consumeHexadecimalNumber(Token* pToken) { std::size_t distance = 0; // Detect the leading '0'. if (cursor_.peek(distance) != '0') return false; ++distance; // Detect a 'x' or 'X'. if (!(cursor_.peek(distance) == 'x' || cursor_.peek(distance) == 'X')) return false; ++distance; // Check and consume all alphanumeric characters. // The number is valid if the characters are valid // hexadecimal characters (0-9, a-f, A-F). The number // can also end with an 'i' (for an imaginary number) // or with an 'L' for an integer. if (!utils::isHexDigit(cursor_.peek(distance))) { consumeToken(tokens::INVALID, distance, pToken); return false; } bool success = true; char peek = cursor_.peek(distance); while (utils::isAlphaNumeric(peek) && peek != '\0') { // If we encounter an 'i' or an 'L', assume // that this ends the identifier. if (peek == 'i' || peek == 'L') { ++distance; break; } if (!utils::isHexDigit(peek)) success = false; ++distance; peek = cursor_.peek(distance); } consumeToken(success ? tokens::NUMBER : tokens::INVALID, distance, pToken); return true; } void consumeNumber(Token* pToken) { bool success = true; std::size_t distance = 0; // NOTE: A leading '-' or '+' is not consumed as part of // the number. // Try parsing this as a hexadecimal number first (e.g. '0xabc'). if (consumeHexadecimalNumber(pToken)) return; // Consume digits while (utils::isDigit(cursor_.peek(distance))) ++distance; // Consume a dot for decimals // Note: '.5' is a valid specification for a number // So is '100.'; ie, with a trailing decimal. if (cursor_.peek(distance) == '.') { ++distance; while (utils::isDigit(cursor_.peek(distance))) ++distance; } // Consume 'e', 'E' for exponential notation if (cursor_.peek(distance) == 'e' || cursor_.peek(distance) == 'E') { ++distance; // Consume a '-' or a '+' for a negative number if (cursor_.peek(distance) == '-' || cursor_.peek(distance) == '+') ++distance; // Parse another set of numbers following the E success = utils::isDigit(cursor_.peek(distance)); while (utils::isDigit(cursor_.peek(distance))) ++distance; // Consume '.' and following numbers. Note that this is // not really a valid number for R but it's better to tokenize // this is a single entity (and then report failure later) if (cursor_.peek(distance) == '.') { success = false; ++distance; while (utils::isDigit(cursor_.peek(distance))) ++distance; } } // Consume a final 'L' for integer literals if (cursor_.peek(distance) == 'L') ++distance; consumeToken(success ? tokens::NUMBER : tokens::INVALID, distance, pToken); } void consumeSymbol(Token* pToken) { std::size_t distance = 1; char ch = cursor_.peek(distance); while (utils::isValidForRSymbol(ch)) { ++distance; ch = cursor_.peek(distance); } const char* ptr = &*(cursor_.begin() + cursor_.offset()); consumeToken(tokens::symbolType(ptr, distance), distance, pToken); } public: Tokenizer(const char* code, std::size_t n) : cursor_(code, n) { } bool tokenize(Token* pToken) { if (cursor_ >= cursor_.end()) { *pToken = Token(tokens::END); return false; } char ch = cursor_.peek(); int n = 0; // Block-related tokens if (ch == '{') consumeToken(tokens::LBRACE, 1, pToken); else if (ch == '}') consumeToken(tokens::RBRACE, 1, pToken); else if (ch == '(') consumeToken(tokens::LPAREN, 1, pToken); else if (ch == ')') consumeToken(tokens::RPAREN, 1, pToken); else if (ch == '[') { if (cursor_.peek(1) == '[') { tokenStack_.push(tokens::LDBRACKET); consumeToken(tokens::LDBRACKET, 2, pToken); } else { tokenStack_.push(tokens::LBRACKET); consumeToken(tokens::LBRACKET, 1, pToken); } } else if (ch == ']') { if (tokenStack_.empty()) { consumeToken(tokens::INVALID, 1, pToken); } else if (tokenStack_.top() == tokens::LDBRACKET) { tokenStack_.pop(); if (cursor_.peek(1) == ']') consumeToken(tokens::RDBRACKET, 2, pToken); else consumeToken(tokens::INVALID, 1, pToken); } else { tokenStack_.pop(); consumeToken(tokens::RBRACKET, 1, pToken); } } // Operators else if (ch == '<') // <<-, <=, <-, < { char next = cursor_.peek(1); if (next == '-') // <- consumeToken(tokens::OPERATOR_ASSIGN_LEFT, 2, pToken); else if (next == '=') // <= consumeToken(tokens::OPERATOR_LESS_OR_EQUAL, 2, pToken); else if (next == '<' && cursor_.peek(2) == '-') consumeToken(tokens::OPERATOR_ASSIGN_LEFT_PARENT, 3, pToken); else consumeToken(tokens::OPERATOR_LESS, 1, pToken); } else if (ch == '>') // >=, > { if (cursor_.peek(1) == '=') consumeToken(tokens::OPERATOR_GREATER_OR_EQUAL, 2, pToken); else consumeToken(tokens::OPERATOR_GREATER, 1, pToken); } else if (ch == '=') // '==', '=' { if (cursor_.peek(1) == '=') consumeToken(tokens::OPERATOR_EQUAL, 2, pToken); else consumeToken(tokens::OPERATOR_ASSIGN_LEFT_EQUALS, 1, pToken); } else if (ch == '|') // '||', '|' { if (cursor_.peek(1) == '|') consumeToken(tokens::OPERATOR_OR_SCALAR, 2, pToken); else consumeToken(tokens::OPERATOR_OR_VECTOR, 1, pToken); } else if (ch == '&') // '&&', '&' { if (cursor_.peek(1) == '&') consumeToken(tokens::OPERATOR_AND_SCALAR, 2, pToken); else consumeToken(tokens::OPERATOR_AND_VECTOR, 1, pToken); } else if (ch == '*') // **, * { if (cursor_.peek(1) == '*') consumeToken(tokens::OPERATOR_EXPONENTATION_STARS, 2, pToken); else consumeToken(tokens::OPERATOR_MULTIPLY, 1, pToken); } else if (ch == ':') // ':::', '::', ':=', ':' { if (cursor_.peek(1) == ':') { if (cursor_.peek(2) == ':') consumeToken(tokens::OPERATOR_NAMESPACE_ALL, 3, pToken); else consumeToken(tokens::OPERATOR_NAMESPACE_EXPORTS, 2, pToken); } else if (cursor_.peek(1) == '=') consumeToken(tokens::OPERATOR_ASSIGN_LEFT_COLON, 2, pToken); else consumeToken(tokens::OPERATOR_SEQUENCE, 1, pToken); } else if (ch == '!') { if (cursor_.peek(1) == '=') consumeToken(tokens::OPERATOR_NOT_EQUAL, 2, pToken); else consumeToken(tokens::OPERATOR_NEGATION, 1, pToken); } else if (ch == '-') // '->>', '->', '-' { if (cursor_.peek(1) == '>') { if (cursor_.peek(2) == '>') consumeToken(tokens::OPERATOR_ASSIGN_RIGHT_PARENT, 3, pToken); else consumeToken(tokens::OPERATOR_ASSIGN_RIGHT, 2, pToken); } else consumeToken(tokens::OPERATOR_MINUS, 1, pToken); } else if (ch == '+') consumeToken(tokens::OPERATOR_PLUS, 1, pToken); else if (ch == '~') consumeToken(tokens::OPERATOR_FORMULA, 1, pToken); else if (ch == '?') consumeToken(tokens::OPERATOR_HELP, 1, pToken); else if (ch == '/') consumeToken(tokens::OPERATOR_DIVIDE, 1, pToken); else if (ch == '@') consumeToken(tokens::OPERATOR_AT, 1, pToken); else if (ch == '$') consumeToken(tokens::OPERATOR_DOLLAR, 1, pToken); else if (ch == '^') consumeToken(tokens::OPERATOR_HAT, 1, pToken); // User operators else if (ch == '%') consumeUserOperator(pToken); // Punctuation-related tokens else if (ch == ',') consumeToken(tokens::COMMA, 1, pToken); else if (ch == ';') consumeToken(tokens::SEMI, 1, pToken); // Whitespace else if (utils::countWhitespaceBytes(cursor_, &n)) consumeToken(tokens::WHITESPACE, n, pToken); // Strings and symbols else if (ch == '\'') consumeQString(pToken); else if (ch == '"') consumeQQString(pToken); else if (ch == '`') consumeQuotedSymbol(pToken); // Comments else if (ch == '#') consumeComment(pToken); // Number else if (isStartOfNumber()) consumeNumber(pToken); // Symbol else if (isStartOfSymbol()) consumeSymbol(pToken); // Nothing matched -- error else consumeToken(tokens::INVALID, 1, pToken); return true; } Token peek(std::size_t lookahead = 1) { Tokenizer clone(*this); Token result(tokens::END); for (std::size_t i = 0; i < lookahead; ++i) { if (!clone.tokenize(&result)) { break; } } return result; } private: TextCursor cursor_; std::stack > tokenStack_; }; } // namespace tokenizer inline std::vector tokenize(const char* code, std::size_t n) { typedef tokenizer::Tokenizer Tokenizer; typedef tokens::Token Token; std::vector tokens; if (n == 0) return tokens; Token token; Tokenizer tokenizer(code, n); while (tokenizer.tokenize(&token)) tokens.push_back(token); return tokens; } inline std::vector tokenize(const std::string& code) { return tokenize(code.data(), code.size()); } } // namespace sourcetools #endif /* SOURCETOOLS_TOKENIZATION_TOKENIZER_H */ sourcetools/inst/include/sourcetools/tokenization/Token.h0000644000176200001440000002631313267770776023616 0ustar liggesusers#ifndef SOURCETOOLS_TOKENIZATION_TOKEN_H #define SOURCETOOLS_TOKENIZATION_TOKEN_H #include #include #include #include #include #include #include #include #include #include namespace sourcetools { namespace tokens { class Token { private: typedef cursors::TextCursor TextCursor; typedef collections::Position Position; public: Token() : begin_(NULL), end_(NULL), offset_(0), type_(INVALID) { } explicit Token(TokenType type) : begin_(NULL), end_(NULL), offset_(0), type_(type) { } Token(const Position& position) : begin_(NULL), end_(NULL), offset_(0), position_(position), type_(INVALID) { } Token(const TextCursor& cursor, TokenType type, std::size_t length) : begin_(cursor.begin() + cursor.offset()), end_(cursor.begin() + cursor.offset() + length), offset_(cursor.offset()), position_(cursor.position()), type_(type) { } const char* begin() const { return begin_; } const char* end() const { return end_; } std::size_t offset() const { return offset_; } std::size_t size() const { return end_ - begin_; } std::string contents() const { return std::string(begin_, end_); } bool contentsEqual(const char* string) { return std::strcmp(begin_, string); } bool contentsEqual(const std::string& string) const { if (string.size() != size()) return false; return std::memcmp(begin_, string.c_str(), size()) == 0; } const Position& position() const { return position_; } std::size_t row() const { return position_.row; } std::size_t column() const { return position_.column; } TokenType type() const { return type_; } bool isType(TokenType type) const { return type_ == type; } private: const char* begin_; const char* end_; std::size_t offset_; Position position_; TokenType type_; }; inline bool isBracket(const Token& token) { return SOURCE_TOOLS_CHECK_MASK(token.type(), SOURCE_TOOLS_BRACKET_MASK); } inline bool isLeftBracket(const Token& token) { return SOURCE_TOOLS_CHECK_MASK(token.type(), SOURCE_TOOLS_BRACKET_LEFT_MASK); } inline bool isRightBracket(const Token& token) { return SOURCE_TOOLS_CHECK_MASK(token.type(), SOURCE_TOOLS_BRACKET_RIGHT_MASK); } inline bool isComplement(TokenType lhs, TokenType rhs) { static const TokenType mask = SOURCE_TOOLS_BRACKET_BIT | SOURCE_TOOLS_BRACKET_LEFT_BIT | SOURCE_TOOLS_BRACKET_RIGHT_BIT; if (SOURCE_TOOLS_CHECK_MASK((lhs | rhs), mask)) return SOURCE_TOOLS_LOWER_BITS(lhs, 4) == SOURCE_TOOLS_LOWER_BITS(rhs, 4); return false; } inline TokenType complement(TokenType type) { static const TokenType mask = SOURCE_TOOLS_BRACKET_LEFT_BIT | SOURCE_TOOLS_BRACKET_RIGHT_BIT; return type ^ mask; } inline bool isKeyword(const Token& token) { return SOURCE_TOOLS_CHECK_MASK(token.type(), SOURCE_TOOLS_KEYWORD_MASK); } inline bool isControlFlowKeyword(const Token& token) { return SOURCE_TOOLS_CHECK_MASK(token.type(), SOURCE_TOOLS_KEYWORD_CONTROL_FLOW_MASK); } inline bool isOperator(const Token& token) { return SOURCE_TOOLS_CHECK_MASK(token.type(), SOURCE_TOOLS_OPERATOR_MASK); } inline bool isUnaryOperator(const Token& token) { return SOURCE_TOOLS_CHECK_MASK(token.type(), SOURCE_TOOLS_OPERATOR_UNARY_MASK); } inline bool isNonUnaryOperator(const Token& token) { return isOperator(token) && !isUnaryOperator(token); } inline bool isComparisonOperator(const Token& token) { switch (token.type()) { case OPERATOR_AND_SCALAR: case OPERATOR_AND_VECTOR: case OPERATOR_OR_SCALAR: case OPERATOR_OR_VECTOR: case OPERATOR_EQUAL: case OPERATOR_NOT_EQUAL: case OPERATOR_LESS: case OPERATOR_LESS_OR_EQUAL: case OPERATOR_GREATER: case OPERATOR_GREATER_OR_EQUAL: return true; default: return false; } } inline bool isWhitespace(const Token& token) { return token.type() == WHITESPACE; } inline bool isComment(const Token& token) { return token.type() == COMMENT; } inline bool isSymbol(const Token& token) { return token.type() == SYMBOL; } inline bool isEnd(const Token& token) { return token.type() == END; } inline bool isString(const Token& token) { return token.type() == STRING; } inline bool isSymbolic(const Token& token) { static const TokenType mask = SYMBOL | NUMBER | STRING; return (token.type() & mask) != 0; } inline bool isNumeric(const Token& token) { return (token.type() & NUMBER) != 0; } inline bool isCallOperator(const Token& token) { return token.type() == LPAREN || token.type() == LBRACKET || token.type() == LDBRACKET; } inline bool isAssignmentOperator(const Token& token) { switch (token.type()) { case OPERATOR_ASSIGN_LEFT: case OPERATOR_ASSIGN_LEFT_COLON: case OPERATOR_ASSIGN_LEFT_EQUALS: case OPERATOR_ASSIGN_LEFT_PARENT: case OPERATOR_ASSIGN_RIGHT: case OPERATOR_ASSIGN_RIGHT_PARENT: return true; default: return false; } } namespace detail { inline bool isHexDigit(char c) { if (c >= '0' && c <= '9') return true; else if (c >= 'a' && c <= 'f') return true; else if (c >= 'A' && c <= 'F') return true; return false; } inline int hexValue(char c) { if (c >= '0' && c <= '9') return c - '0'; else if (c >= 'a' && c <= 'f') return c - 'a' + 10; else if (c >= 'A' && c <= 'F') return c - 'A' + 10; return 0; } // Parses an octal escape sequence, e.g. '\012'. inline bool parseOctal(const char*& it, char*& output) { // Check for opening escape if (*it != '\\') return false; // Check for number following char lookahead = *(it + 1); if (lookahead < '0' || lookahead > '7') return false; ++it; // Begin parsing. Consume up to three numbers. unsigned char result = 0; const char* end = it + 3; for (; it != end; ++it) { char ch = *it; if ('0' <= ch && ch <= '7') result = 8 * result + ch - '0'; else break; } // Assign result, and return. *output++ = result; return true; } // Parse a hex escape sequence, e.g. '\xFF'. inline bool parseHex(const char*& it, char*& output) { // Check for opening escape. if (*it != '\\') return false; if (*(it + 1) != 'x') return false; if (!isHexDigit(*(it + 2))) return false; // Begin parsing. it += 2; unsigned char value = 0; const char* end = it + 2; for (; it != end; ++it) { int result = hexValue(*it); if (result == 0) break; value = 16 * value + result; } *output++ = value; return true; } // Parse a unicode escape sequence. inline bool parseUnicode(const char*& it, char*& output) { if (*it != '\\') return false; char lookahead = *(it + 1); int size; if (lookahead == 'u') size = 4; else if (lookahead == 'U') size = 8; else return false; // Clone the input iterator (only set it on success) const char* clone = it; clone += 2; // Check for e.g. '\u{...}' // ^ bool delimited = *clone == '{'; clone += delimited; // Check for a hex digit. if (!isHexDigit(*clone)) return false; // Begin parsing hex digits wchar_t value = 0; const char* end = clone + size; for (; clone != end; ++clone) { if (!isHexDigit(*clone)) break; int hex = hexValue(*clone); value = 16 * value + hex; } // Eat a closing '}' if we had a starting '{'. if (delimited) { if (*clone != '}') return false; ++clone; } std::mbstate_t state; std::memset(&state, 0, sizeof(state)); std::size_t bytes = std::wcrtomb(output, value, &state); if (bytes == static_cast(-1)) return false; // Update iterator state it = clone; output += bytes; return true; } } // namespace detail inline std::string stringValue(const char* begin, const char* end) { if (begin == end) return std::string(); std::size_t n = end - begin; scoped_array buffer(new char[n + 1]); const char* it = begin; char* output = buffer; while (it < end) { if (*it == '\\') { if (detail::parseOctal(it, output) || detail::parseHex(it, output) || detail::parseUnicode(it, output)) { continue; } // Handle the rest ++it; switch (*it) { case 'a': *output++ = '\a'; break; case 'b': *output++ = '\b'; break; case 'f': *output++ = '\f'; break; case 'n': *output++ = '\n'; break; case 'r': *output++ = '\r'; break; case 't': *output++ = '\t'; break; case 'v': *output++ = '\v'; break; case '\\': *output++ = '\\'; break; default: *output++ = *it; break; } ++it; } else { *output++ = *it++; } } // Ensure null termination, just in case *output++ = '\0'; // Construct the result string and return std::string result(buffer, output - buffer); return result; } inline std::string stringValue(const Token& token) { switch (token.type()) { case STRING: return stringValue(token.begin() + 1, token.end() - 1); case SYMBOL: if (*token.begin() == '`') return stringValue(token.begin() + 1, token.end() - 1); default: return stringValue(token.begin(), token.end()); } } } // namespace tokens inline std::string toString(tokens::TokenType type) { using namespace tokens; if (type == INVALID) return "invalid"; else if (type == END) return "end"; else if (type == EMPTY) return "empty"; else if (type == MISSING) return "missing"; else if (type == SEMI) return "semi"; else if (type == COMMA) return "comma"; else if (type == SYMBOL) return "symbol"; else if (type == COMMENT) return "comment"; else if (type == WHITESPACE) return "whitespace"; else if (type == STRING) return "string"; else if (type == NUMBER) return "number"; else if (SOURCE_TOOLS_CHECK_MASK(type, SOURCE_TOOLS_BRACKET_MASK)) return "bracket"; else if (SOURCE_TOOLS_CHECK_MASK(type, SOURCE_TOOLS_KEYWORD_MASK)) return "keyword"; else if (SOURCE_TOOLS_CHECK_MASK(type, SOURCE_TOOLS_OPERATOR_MASK)) return "operator"; return "unknown"; } inline std::string toString(const tokens::Token& token) { std::string contents; if (token.isType(tokens::END)) contents = ""; else if (token.isType(tokens::EMPTY)) contents = ""; else if (token.isType(tokens::MISSING)) contents = ""; else contents = token.contents(); static const int N = 1024; if (contents.size() > N / 2) contents = contents.substr(0, N / 2); char buff[N]; std::sprintf(buff, "[%4lu:%4lu]: %s", static_cast(token.row()), static_cast(token.column()), contents.c_str()); return buff; } inline std::ostream& operator<<(std::ostream& os, const tokens::Token& token) { return os << toString(token); } inline std::ostream& operator<<(std::ostream& os, const std::vector& tokens) { for (std::vector::const_iterator it = tokens.begin(); it != tokens.end(); ++it) { os << *it << std::endl; } return os; } } // namespace sourcetools #endif /* SOURCETOOLS_TOKENIZATION_TOKEN_H */ sourcetools/inst/include/sourcetools/tokenization/tokenization.h0000644000176200001440000000044613045713105025224 0ustar liggesusers#ifndef SOURCETOOLS_TOKENIZATION_TOKENIZATION_H #define SOURCETOOLS_TOKENIZATION_TOKENIZATION_H #include #include #include #endif /* SOURCETOOLS_TOKENIZATION_TOKENIZATION_H */ sourcetools/inst/include/sourcetools/tokenization/Registration.h0000644000176200001440000002221113267770776025201 0ustar liggesusers#ifndef SOURCETOOLS_TOKENIZATION_REGISTRATION_H #define SOURCETOOLS_TOKENIZATION_REGISTRATION_H #include #include #include namespace sourcetools { namespace tokens { typedef unsigned int TokenType; // Simple, non-nestable types. #define SOURCE_TOOLS_REGISTER_SIMPLE_TYPE(__NAME__, __TYPE__) \ static const TokenType __NAME__ = __TYPE__ SOURCE_TOOLS_REGISTER_SIMPLE_TYPE(INVALID, (1 << 31)); SOURCE_TOOLS_REGISTER_SIMPLE_TYPE(END, (1 << 30)); SOURCE_TOOLS_REGISTER_SIMPLE_TYPE(EMPTY, (1 << 29)); SOURCE_TOOLS_REGISTER_SIMPLE_TYPE(MISSING, (1 << 28)); SOURCE_TOOLS_REGISTER_SIMPLE_TYPE(ROOT, (1 << 27)); SOURCE_TOOLS_REGISTER_SIMPLE_TYPE(SEMI, (1 << 26)); SOURCE_TOOLS_REGISTER_SIMPLE_TYPE(COMMA, (1 << 25)); SOURCE_TOOLS_REGISTER_SIMPLE_TYPE(SYMBOL, (1 << 24)); SOURCE_TOOLS_REGISTER_SIMPLE_TYPE(COMMENT, (1 << 23)); SOURCE_TOOLS_REGISTER_SIMPLE_TYPE(WHITESPACE, (1 << 22)); SOURCE_TOOLS_REGISTER_SIMPLE_TYPE(STRING, (1 << 21)); SOURCE_TOOLS_REGISTER_SIMPLE_TYPE(NUMBER, (1 << 20)); /* Brackets */ #define SOURCE_TOOLS_BRACKET_BIT (1 << 19) #define SOURCE_TOOLS_BRACKET_RIGHT_BIT (1 << 5) #define SOURCE_TOOLS_BRACKET_LEFT_BIT (1 << 4) #define SOURCE_TOOLS_BRACKET_MASK SOURCE_TOOLS_BRACKET_BIT #define SOURCE_TOOLS_BRACKET_LEFT_MASK (SOURCE_TOOLS_BRACKET_BIT | SOURCE_TOOLS_BRACKET_LEFT_BIT) #define SOURCE_TOOLS_BRACKET_RIGHT_MASK (SOURCE_TOOLS_BRACKET_BIT | SOURCE_TOOLS_BRACKET_RIGHT_BIT) #define SOURCE_TOOLS_REGISTER_BRACKET(__NAME__, __SIDE__, __INDEX__) \ static const TokenType __NAME__ = \ SOURCE_TOOLS_BRACKET_BIT | __SIDE__ | __INDEX__ SOURCE_TOOLS_REGISTER_BRACKET(LPAREN, SOURCE_TOOLS_BRACKET_LEFT_BIT, (1 << 0)); SOURCE_TOOLS_REGISTER_BRACKET(LBRACE, SOURCE_TOOLS_BRACKET_LEFT_BIT, (1 << 1)); SOURCE_TOOLS_REGISTER_BRACKET(LBRACKET, SOURCE_TOOLS_BRACKET_LEFT_BIT, (1 << 2)); SOURCE_TOOLS_REGISTER_BRACKET(LDBRACKET, SOURCE_TOOLS_BRACKET_LEFT_BIT, (1 << 3)); SOURCE_TOOLS_REGISTER_BRACKET(RPAREN, SOURCE_TOOLS_BRACKET_RIGHT_BIT, (1 << 0)); SOURCE_TOOLS_REGISTER_BRACKET(RBRACE, SOURCE_TOOLS_BRACKET_RIGHT_BIT, (1 << 1)); SOURCE_TOOLS_REGISTER_BRACKET(RBRACKET, SOURCE_TOOLS_BRACKET_RIGHT_BIT, (1 << 2)); SOURCE_TOOLS_REGISTER_BRACKET(RDBRACKET, SOURCE_TOOLS_BRACKET_RIGHT_BIT, (1 << 3)); /* Operators */ #define SOURCE_TOOLS_OPERATOR_BIT (1 << 18) #define SOURCE_TOOLS_OPERATOR_UNARY_BIT (1 << 6) #define SOURCE_TOOLS_OPERATOR_MASK (SOURCE_TOOLS_OPERATOR_BIT) #define SOURCE_TOOLS_OPERATOR_UNARY_MASK (SOURCE_TOOLS_OPERATOR_MASK | SOURCE_TOOLS_OPERATOR_UNARY_BIT) #define SOURCE_TOOLS_REGISTER_OPERATOR(__NAME__, __STRING__, __MASKS__) \ \ static const TokenType OPERATOR_ ## __NAME__ = \ SOURCE_TOOLS_OPERATOR_BIT | __MASKS__; \ \ static const char* const \ OPERATOR_ ## __NAME__ ## _STRING = __STRING__ #define SOURCE_TOOLS_REGISTER_UNARY_OPERATOR(__NAME__, __STRING__, __INDEX__) \ SOURCE_TOOLS_REGISTER_OPERATOR(__NAME__, __STRING__, SOURCE_TOOLS_OPERATOR_UNARY_BIT | __INDEX__) // See ?"Syntax" for details on R's operators. // Note: All operators registered work in a binary context, but only // some will work as unary operators. (Occurring to the left of the token). // // In other words, -1 is parsed as `-`(1). // // Note that although brackets are operators we tokenize them separately, // since we need to later check for their paired complement. SOURCE_TOOLS_REGISTER_UNARY_OPERATOR(PLUS, "+", 0); SOURCE_TOOLS_REGISTER_UNARY_OPERATOR(MINUS, "-", 1); SOURCE_TOOLS_REGISTER_UNARY_OPERATOR(HELP, "?", 2); SOURCE_TOOLS_REGISTER_UNARY_OPERATOR(NEGATION, "!", 3); SOURCE_TOOLS_REGISTER_UNARY_OPERATOR(FORMULA, "~", 4); SOURCE_TOOLS_REGISTER_OPERATOR(NAMESPACE_EXPORTS, "::", 5); SOURCE_TOOLS_REGISTER_OPERATOR(NAMESPACE_ALL, ":::", 6); SOURCE_TOOLS_REGISTER_OPERATOR(DOLLAR, "$", 7); SOURCE_TOOLS_REGISTER_OPERATOR(AT, "@", 8); SOURCE_TOOLS_REGISTER_OPERATOR(HAT, "^", 9); SOURCE_TOOLS_REGISTER_OPERATOR(EXPONENTATION_STARS, "**", 10); SOURCE_TOOLS_REGISTER_OPERATOR(SEQUENCE, ":", 11); SOURCE_TOOLS_REGISTER_OPERATOR(MULTIPLY, "*", 12); SOURCE_TOOLS_REGISTER_OPERATOR(DIVIDE, "/", 13); SOURCE_TOOLS_REGISTER_OPERATOR(LESS, "<", 14); SOURCE_TOOLS_REGISTER_OPERATOR(LESS_OR_EQUAL, "<=", 15); SOURCE_TOOLS_REGISTER_OPERATOR(GREATER, ">", 16); SOURCE_TOOLS_REGISTER_OPERATOR(GREATER_OR_EQUAL, ">=", 17); SOURCE_TOOLS_REGISTER_OPERATOR(EQUAL, "==", 18); SOURCE_TOOLS_REGISTER_OPERATOR(NOT_EQUAL, "!=", 19); SOURCE_TOOLS_REGISTER_OPERATOR(AND_VECTOR, "&", 20); SOURCE_TOOLS_REGISTER_OPERATOR(AND_SCALAR, "&&", 21); SOURCE_TOOLS_REGISTER_OPERATOR(OR_VECTOR, "|", 22); SOURCE_TOOLS_REGISTER_OPERATOR(OR_SCALAR, "||", 23); SOURCE_TOOLS_REGISTER_OPERATOR(ASSIGN_LEFT, "<-", 24); SOURCE_TOOLS_REGISTER_OPERATOR(ASSIGN_LEFT_PARENT, "<<-", 25); SOURCE_TOOLS_REGISTER_OPERATOR(ASSIGN_RIGHT, "->", 26); SOURCE_TOOLS_REGISTER_OPERATOR(ASSIGN_RIGHT_PARENT, "->>", 27); SOURCE_TOOLS_REGISTER_OPERATOR(ASSIGN_LEFT_EQUALS, "=", 28); SOURCE_TOOLS_REGISTER_OPERATOR(ASSIGN_LEFT_COLON, ":=", 29); SOURCE_TOOLS_REGISTER_OPERATOR(USER, "%%", 30); /* Keywords and symbols */ #define SOURCE_TOOLS_KEYWORD_BIT (1 << 17) #define SOURCE_TOOLS_KEYWORD_CONTROL_FLOW_BIT (1 << 7) #define SOURCE_TOOLS_KEYWORD_MASK SOURCE_TOOLS_KEYWORD_BIT #define SOURCE_TOOLS_KEYWORD_CONTROL_FLOW_MASK (SOURCE_TOOLS_KEYWORD_MASK | SOURCE_TOOLS_KEYWORD_CONTROL_FLOW_BIT) #define SOURCE_TOOLS_REGISTER_KEYWORD(__NAME__, __MASKS__) \ static const TokenType KEYWORD_ ## __NAME__ = \ __MASKS__ | SOURCE_TOOLS_KEYWORD_MASK #define SOURCE_TOOLS_REGISTER_CONTROL_FLOW_KEYWORD(__NAME__, __MASKS__) \ SOURCE_TOOLS_REGISTER_KEYWORD(__NAME__, __MASKS__ | SOURCE_TOOLS_KEYWORD_CONTROL_FLOW_MASK) // See '?Reserved' for a list of reversed R symbols. SOURCE_TOOLS_REGISTER_CONTROL_FLOW_KEYWORD(IF, 1); SOURCE_TOOLS_REGISTER_CONTROL_FLOW_KEYWORD(FOR, 2); SOURCE_TOOLS_REGISTER_CONTROL_FLOW_KEYWORD(WHILE, 3); SOURCE_TOOLS_REGISTER_CONTROL_FLOW_KEYWORD(REPEAT, 4); SOURCE_TOOLS_REGISTER_CONTROL_FLOW_KEYWORD(FUNCTION, 5); SOURCE_TOOLS_REGISTER_KEYWORD(ELSE, 6); SOURCE_TOOLS_REGISTER_KEYWORD(IN, 7); SOURCE_TOOLS_REGISTER_KEYWORD(NEXT, 8); SOURCE_TOOLS_REGISTER_KEYWORD(BREAK, 9); SOURCE_TOOLS_REGISTER_KEYWORD(TRUE, 10); SOURCE_TOOLS_REGISTER_KEYWORD(FALSE, 11); SOURCE_TOOLS_REGISTER_KEYWORD(NULL, 12); SOURCE_TOOLS_REGISTER_KEYWORD(Inf, 13); SOURCE_TOOLS_REGISTER_KEYWORD(NaN, 14); SOURCE_TOOLS_REGISTER_KEYWORD(NA, 15); SOURCE_TOOLS_REGISTER_KEYWORD(NA_integer_, 16); SOURCE_TOOLS_REGISTER_KEYWORD(NA_real_, 17); SOURCE_TOOLS_REGISTER_KEYWORD(NA_complex_, 18); SOURCE_TOOLS_REGISTER_KEYWORD(NA_character_, 19); inline TokenType symbolType(const char* string, std::size_t n) { // TODO: Is this insanity really an optimization or am I just silly? if (n < 2 || n > 13) { return SYMBOL; } else if (n == 2) { if (!std::memcmp(string, "in", n)) return KEYWORD_IN; if (!std::memcmp(string, "if", n)) return KEYWORD_IF; if (!std::memcmp(string, "NA", n)) return KEYWORD_NA; } else if (n == 3) { if (!std::memcmp(string, "for", n)) return KEYWORD_FOR; if (!std::memcmp(string, "Inf", n)) return KEYWORD_Inf; if (!std::memcmp(string, "NaN", n)) return KEYWORD_NaN; } else if (n == 4) { if (!std::memcmp(string, "else", n)) return KEYWORD_ELSE; if (!std::memcmp(string, "next", n)) return KEYWORD_NEXT; if (!std::memcmp(string, "TRUE", n)) return KEYWORD_TRUE; if (!std::memcmp(string, "NULL", n)) return KEYWORD_NULL; } else if (n == 5) { if (!std::memcmp(string, "while", n)) return KEYWORD_WHILE; if (!std::memcmp(string, "break", n)) return KEYWORD_BREAK; if (!std::memcmp(string, "FALSE", n)) return KEYWORD_FALSE; } else if (n == 6) { if (!std::memcmp(string, "repeat", n)) return KEYWORD_REPEAT; } else if (n == 8) { if (!std::memcmp(string, "function", n)) return KEYWORD_FUNCTION; if (!std::memcmp(string, "NA_real_", n)) return KEYWORD_NA_real_; } else if (n == 11) { if (!std::memcmp(string, "NA_integer_", n)) return KEYWORD_NA_integer_; if (!std::memcmp(string, "NA_complex_", n)) return KEYWORD_NA_complex_; } else if (n == 13) { if (!std::memcmp(string, "NA_character_", n)) return KEYWORD_NA_character_; } return SYMBOL; } inline TokenType symbolType(const std::string& symbol) { return symbolType(symbol.data(), symbol.size()); } } // namespace tokens } // namespace sourcetools #endif /* SOURCETOOLS_TOKENIZATION_REGISTRATION_H */ sourcetools/inst/include/sourcetools/core/0000755000176200001440000000000013267770776020572 5ustar liggesuserssourcetools/inst/include/sourcetools/core/core.h0000644000176200001440000000025713267770776021677 0ustar liggesusers#ifndef SOURCETOOLS_CORE_CORE_H #define SOURCETOOLS_CORE_CORE_H #include #include #endif /* SOURCETOOLS_CORE_CORE_H */ sourcetools/inst/include/sourcetools/core/macros.h0000644000176200001440000000332013267770776022225 0ustar liggesusers#ifndef SOURCETOOLS_CORE_MACROS_H #define SOURCETOOLS_CORE_MACROS_H #include #include #include /* Utility */ #ifdef __GNUC__ # define LIKELY(x) __builtin_expect(!!(x), 1) # define UNLIKELY(x) __builtin_expect(!!(x), 0) #else # define LIKELY(x) x # define UNLIKELY(x) x #endif #define SOURCE_TOOLS_CHECK_MASK(__SELF__, __MASK__) \ ((__MASK__ & __SELF__) == __MASK__) #define SOURCE_TOOLS_LOWER_BITS(__VALUE__, __BITS__) \ (((1 << __BITS__) - 1) & __VALUE__) #define SOURCE_TOOLS_PASTE(__X__, __Y__) __X__ ## __Y__ #define SOURCE_TOOLS_STRINGIFY(__X__) #__X__ /* Logging */ namespace sourcetools { namespace debug { inline std::string shortFilePath(const std::string& filePath) { std::string::size_type index = filePath.find_last_of("/"); if (index != std::string::npos) return filePath.substr(index + 1); return filePath; } inline std::string debugPosition(const char* filePath, int line) { static const int N = 1024; char buffer[N + 1]; std::string shortPath = shortFilePath(filePath); if (shortPath.size() > N / 2) shortPath = shortPath.substr(0, N / 2); std::sprintf(buffer, "[%s:%4i]", shortPath.c_str(), line); return buffer; } } // namespace debug } // namespace sourcetools // Flip on/off as necessary #define SOURCE_TOOLS_ENABLE_DEBUG_LOGGING #ifdef SOURCE_TOOLS_ENABLE_DEBUG_LOGGING #include #define DEBUG(__X__) \ std::cerr << ::sourcetools::debug::debugPosition(__FILE__, __LINE__) \ << ": " << __X__ << ::std::endl; #define DEBUG_BLOCK(x) #else #define DEBUG(x) #define DEBUG_BLOCK(x) if (false) #endif #endif /* SOURCETOOLS_CORE_MACROS_H */ sourcetools/inst/include/sourcetools/core/util.h0000644000176200001440000000465113267770776021726 0ustar liggesusers#ifndef SOURCETOOLS_CORE_UTIL_H #define SOURCETOOLS_CORE_UTIL_H #include #include #include #include namespace sourcetools { namespace detail { class noncopyable { protected: noncopyable() {} ~noncopyable() {} private: noncopyable(const noncopyable&); noncopyable& operator=(const noncopyable&); }; } // namespace detail typedef detail::noncopyable noncopyable; template class scoped_ptr : noncopyable { public: explicit scoped_ptr(T* pData) : pData_(pData) {} T& operator*() const { return *pData_; } T* operator->() const { return pData_; } operator T*() const { return pData_; } ~scoped_ptr() { delete pData_; } private: T* pData_; }; template class scoped_array : noncopyable { public: explicit scoped_array(T* pData) : pData_(pData) {} T& operator*() const { return *pData_; } T* operator->() const { return pData_; } operator T*() const { return pData_; } ~scoped_array() { delete[] pData_; } private: T* pData_; }; namespace utils { inline bool isWhitespace(char ch) { return ch == ' ' || ch == '\f' || ch == '\r' || ch == '\n' || ch == '\t' || ch == '\v'; } template inline bool countWhitespaceBytes(const char* data, T* pBytes) { T bytes = 0; while (isWhitespace(*data)) { ++data; ++bytes; } *pBytes = bytes; return bytes != 0; } inline bool isDigit(char ch) { return (ch >= '0' && ch <= '9'); } inline bool isAlphabetic(char ch) { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); } inline bool isAlphaNumeric(char ch) { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9'); } inline bool isHexDigit(char ch) { return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'); } inline bool isValidForStartOfRSymbol(char ch) { return isAlphabetic(ch) || ch == '.' || static_cast(ch) < 0; } inline bool isValidForRSymbol(char ch) { return isAlphaNumeric(ch) || ch == '.' || ch == '_' || static_cast(ch) < 0; } inline std::string escape(char ch) { switch (ch) { case '\r': return "\\r"; case '\n': return "\\n"; case '\t': return "\\t"; default: return std::string(1, ch); } } } // namespace utils } // namespace sourcetools #endif /* SOURCETOOLS_CORE_UTIL_H */ sourcetools/inst/include/sourcetools/tests/0000755000176200001440000000000013267770776021004 5ustar liggesuserssourcetools/inst/include/sourcetools/tests/testthat.h0000644000176200001440000000056413267770776023022 0ustar liggesusers#ifndef SOURCETOOLS_TESTS_TESTTHAT_H #define SOURCETOOLS_TESTS_TESTTHAT_H // disable testthat with older gcc #if defined(__GNUC__) && defined(__GNUC_MINOR__) && !defined(__clang__) # if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 6) # define TESTTHAT_DISABLED # endif #endif // include testthat.h #include #endif /* SOURCETOOLS_TESTS_TESTTHAT_H */ sourcetools/inst/include/sourcetools/read/0000755000176200001440000000000013267770776020555 5ustar liggesuserssourcetools/inst/include/sourcetools/read/posix/0000755000176200001440000000000013267770776021717 5ustar liggesuserssourcetools/inst/include/sourcetools/read/posix/MemoryMappedConnection.h0000644000176200001440000000207513267770776026513 0ustar liggesusers#ifndef SOURCETOOLS_READ_POSIX_MEMORY_MAPPED_CONNECTION_H #define SOURCETOOLS_READ_POSIX_MEMORY_MAPPED_CONNECTION_H #include #include #include #include namespace sourcetools { namespace detail { class MemoryMappedConnection { public: MemoryMappedConnection(int fd, std::size_t size) : size_(size) { #ifdef MAP_POPULATE map_ = (char*) ::mmap(0, size, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0); #else map_ = (char*) ::mmap(0, size, PROT_READ, MAP_SHARED, fd, 0); #endif #if defined(POSIX_MADV_SEQUENTIAL) && defined(POSIX_MADV_WILLNEED) ::posix_madvise((void*) map_, size, POSIX_MADV_SEQUENTIAL | POSIX_MADV_WILLNEED); #endif } ~MemoryMappedConnection() { if (map_ != MAP_FAILED) ::munmap(map_, size_); } bool open() { return map_ != MAP_FAILED; } operator char*() const { return map_; } private: char* map_; std::size_t size_; }; } // namespace detail } // namespace sourcetools #endif /* SOURCETOOLS_READ_POSIX_MEMORY_MAPPED_CONNECTION_H */ sourcetools/inst/include/sourcetools/read/posix/FileConnection.h0000644000176200001440000000153113267770776024767 0ustar liggesusers#ifndef SOURCETOOLS_READ_POSIX_FILE_CONNECTION_H #define SOURCETOOLS_READ_POSIX_FILE_CONNECTION_H #include #include #include #include namespace sourcetools { namespace detail { class FileConnection { public: typedef int FileDescriptor; FileConnection(const char* path, int flags = O_RDONLY) { fd_ = ::open(path, flags); } ~FileConnection() { if (open()) ::close(fd_); } bool open() { return fd_ != -1; } bool size(std::size_t* pSize) { struct stat info; if (::fstat(fd_, &info) == -1) return false; *pSize = info.st_size; return true; } operator FileDescriptor() const { return fd_; } private: FileDescriptor fd_; }; } // namespace detail } // namespace sourcetools #endif /* SOURCETOOLS_READ_POSIX_FILE_CONNECTION_H */ sourcetools/inst/include/sourcetools/read/MemoryMappedReader.h0000644000176200001440000000547513267770776024463 0ustar liggesusers#ifndef SOURCETOOLS_READ_MEMORY_MAPPED_READER_H #define SOURCETOOLS_READ_MEMORY_MAPPED_READER_H #include #include #include #include #include #include #ifndef _WIN32 # include # include #else # include # include #endif namespace sourcetools { namespace detail { class MemoryMappedReader { public: class VectorReader { public: explicit VectorReader(std::vector* pData) : pData_(pData) { } template void operator()(const T& lhs, const T& rhs) { pData_->push_back(std::string(lhs, rhs)); } private: std::vector* pData_; }; static bool read(const char* path, std::string* pContent) { // Open file connection FileConnection conn(path); if (!conn.open()) return false; // Get size of file std::size_t size; if (!conn.size(&size)) return false; // Early return for empty files if (UNLIKELY(size == 0)) return true; // mmap the file MemoryMappedConnection map(conn, size); if (!map.open()) return false; pContent->assign(map, size); return true; } template static bool read_lines(const char* path, F f) { FileConnection conn(path); if (!conn.open()) return false; // Get size of file std::size_t size; if (!conn.size(&size)) return false; // Early return for empty files if (UNLIKELY(size == 0)) return true; // mmap the file MemoryMappedConnection map(conn, size); if (!map.open()) return false; // special case: just a '\n' bool endsWithNewline = map[size - 1] == '\n'; if (size == 1 && endsWithNewline) return true; // Search for newlines const char* lower = map; const char* upper = map; const char* end = map + size; while (true) { upper = std::find(lower, end, '\n'); if (upper == end) break; // Handle '\r\n' int CR = *(upper - 1) == '\r'; upper -= CR; // Pass to functor f(lower, upper); // Update lower = upper + 1 + CR; } // If this file ended with a newline, we're done if (endsWithNewline) return true; // Otherwise, consume one more string, then we're done f(lower, end); return true; } static bool read_lines(const char* path, std::vector* pContent) { VectorReader reader(pContent); return read_lines(path, reader); } }; } // namespace detail } // namespace sourcetools #endif /* SOURCETOOLS_READ_MEMORY_MAPPED_READER_H */ sourcetools/inst/include/sourcetools/read/read.h0000644000176200001440000000112113045713105021605 0ustar liggesusers#ifndef SOURCETOOLS_READ_READ_H #define SOURCETOOLS_READ_READ_H #include #include #include namespace sourcetools { inline bool read(const std::string& absolutePath, std::string* pContent) { return detail::MemoryMappedReader::read(absolutePath.c_str(), pContent); } inline bool read_lines(const std::string& absolutePath, std::vector* pLines) { return detail::MemoryMappedReader::read_lines(absolutePath.c_str(), pLines); } } // namespace sourcetools #endif /* SOURCETOOLS_READ_READ_H */ sourcetools/inst/include/sourcetools/read/windows/0000755000176200001440000000000013267770776022247 5ustar liggesuserssourcetools/inst/include/sourcetools/read/windows/MemoryMappedConnection.h0000644000176200001440000000172613267770776027045 0ustar liggesusers#ifndef SOURCETOOLS_READ_WINDOWS_MEMORY_MAPPED_CONNECTION_H #define SOURCETOOLS_READ_WINDOWS_MEMORY_MAPPED_CONNECTION_H #undef Realloc #undef Free #include namespace sourcetools { namespace detail { class MemoryMappedConnection { public: MemoryMappedConnection(HANDLE handle, std::size_t size) : map_(NULL), size_(size) { handle_ = ::CreateFileMapping(handle, NULL, PAGE_READONLY, 0, 0, NULL); if (handle_ == NULL) return; map_ = (char*) ::MapViewOfFile(handle_, FILE_MAP_READ, 0, 0, size); } ~MemoryMappedConnection() { if (handle_ != INVALID_HANDLE_VALUE) ::CloseHandle(handle_); if (map_ != NULL) ::UnmapViewOfFile(map_); } bool open() { return map_ != NULL; } operator char*() const { return map_; } private: char* map_; std::size_t size_; HANDLE handle_; }; } // namespace detail } // namespace sourcetools #endif /* SOURCETOOLS_READ_WINDOWS_MEMORY_MAPPED_CONNECTION_H */ sourcetools/inst/include/sourcetools/read/windows/FileConnection.h0000644000176200001440000000154613267770776025325 0ustar liggesusers#ifndef SOURCETOOLS_READ_WINDOWS_FILE_CONNECTION_H #define SOURCETOOLS_READ_WINDOWS_FILE_CONNECTION_H #undef Realloc #undef Free #include namespace sourcetools { namespace detail { class FileConnection { public: typedef HANDLE FileDescriptor; FileConnection(const char* path, int flags = GENERIC_READ) { handle_ = ::CreateFile(path, flags, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL); } ~FileConnection() { if (open()) ::CloseHandle(handle_); } bool open() { return handle_ != INVALID_HANDLE_VALUE; } bool size(std::size_t* pSize) { *pSize = ::GetFileSize(handle_, NULL); return true; } operator FileDescriptor() const { return handle_; } private: FileDescriptor handle_; }; } // namespace detail } // namespace sourcetools #endif /* SOURCETOOLS_READ_WINDOWS_FILE_CONNECTION_H */ sourcetools/inst/include/sourcetools/collection/0000755000176200001440000000000013267770776021775 5ustar liggesuserssourcetools/inst/include/sourcetools/collection/Position.h0000644000176200001440000000311313267770776023750 0ustar liggesusers#ifndef SOURCETOOLS_COLLECTION_POSITION_H #define SOURCETOOLS_COLLECTION_POSITION_H #include #include namespace sourcetools { namespace collections { struct Position { Position() : row(0), column(0) { } Position(std::size_t row, std::size_t column) : row(row), column(column) { } friend std::ostream& operator<<(std::ostream& os, const Position& position) { os << position.row << ":" << position.column; return os; } friend bool operator <(const Position& lhs, const Position& rhs) { return lhs.row < rhs.row || (lhs.row == rhs.row && lhs.column < rhs.column); } friend bool operator <=(const Position& lhs, const Position& rhs) { return lhs.row < rhs.row || (lhs.row == rhs.row && lhs.column <= rhs.column); } friend bool operator ==(const Position& lhs, const Position& rhs) { return lhs.row == rhs.row && lhs.column == rhs.column; } friend bool operator >(const Position& lhs, const Position& rhs) { return lhs.row > rhs.row || (lhs.row == rhs.row && lhs.column > rhs.column); } friend bool operator >=(const Position& lhs, const Position& rhs) { return lhs.row > rhs.row || (lhs.row == rhs.row && lhs.column >= rhs.column); } friend Position operator +(const Position& lhs, std::size_t rhs) { return Position(lhs.row, lhs.column + rhs); } std::size_t row; std::size_t column; }; } // namespace collections } // namespace sourcetools #endif /* SOURCETOOLS_COLLECTION_POSITION_H */ sourcetools/inst/include/sourcetools/collection/Range.h0000644000176200001440000000130513045713105023152 0ustar liggesusers#ifndef SOURCETOOLS_COLLECTION_RANGE_H #define SOURCETOOLS_COLLECTION_RANGE_H #include #include namespace sourcetools { namespace collections { class Range { public: Range(const Position& start, const Position& end) : start_(start), end_(end) { } friend std::ostream& operator <<(std::ostream& os, const Range& range) { os << "[" << range.start() << "-" << range.end() << "]"; return os; } const Position start() const { return start_; } const Position end() const { return end_; } private: Position start_; Position end_; }; } // namespace collections } // namespace sourcetools #endif /* SOURCETOOLS_COLLECTION_RANGE_H */ sourcetools/inst/include/sourcetools/collection/collection.h0000644000176200001440000000034213045713105024251 0ustar liggesusers#ifndef SOURCETOOLS_COLLECTION_COLLECTION_H #define SOURCETOOLS_COLLECTION_COLLECTION_H #include #include #endif /* SOURCETOOLS_COLLECTION_COLLECTION_H */ sourcetools/inst/include/sourcetools/utf8/0000755000176200001440000000000013267770776020530 5ustar liggesuserssourcetools/inst/include/sourcetools/utf8/utf8.h0000644000176200001440000000347413267770776021577 0ustar liggesusers#ifndef SOURCETOOLS_UTF8_UTF8_H #define SOURCETOOLS_UTF8_UTF8_H #include #include namespace sourcetools { namespace utf8 { namespace detail { static const unsigned char mask[] = { 0, // 00000000 0x7F, // 01111111 0x1F, // 00011111 0x0F, // 00001111 0x07, // 00000111 0x03, // 00000011 0x01 // 00000001 }; } // namespace detail class iterator { public: iterator(const char* data) : data_(reinterpret_cast(data)), offset_(0) { } iterator(const iterator& other) : data_(other.data_), offset_(other.offset_) { } wchar_t operator*() { std::size_t n = size(); if (n == 0 || n > 6) return -1; const unsigned char* it = data_ + offset_; wchar_t ch = (*it++) & detail::mask[n]; for (std::size_t i = 1; i < n; ++i) { ch <<= 6; ch |= (*it++) & 0x3F; } return ch; } iterator& operator++() { offset_ += size(); return *this; } iterator operator++(int) { iterator copy(*this); operator++(); return copy; } bool operator==(const iterator& it) { return data_ + offset_ == it.data_ + it.offset_; } bool operator!=(const iterator& it) { return data_ + offset_ != it.data_ + it.offset_; } private: int size() { unsigned char ch = data_[offset_]; if (ch == 0) return 0; else if (ch < 192) return 1; else if (ch < 224) return 2; else if (ch < 240) return 3; else if (ch < 248) return 4; else if (ch < 252) return 5; else if (ch < 254) return 6; // TODO: on error? return 1; } private: const unsigned char* data_; std::size_t offset_; }; } // namespace utf8 } // namespace sourcetools #endif /* SOURCETOOLS_UTF8_UTF8_H */ sourcetools/inst/include/sourcetools/platform/0000755000176200001440000000000013267770776021466 5ustar liggesuserssourcetools/inst/include/sourcetools/platform/platform.h0000644000176200001440000000061013267770776023460 0ustar liggesusers#ifndef SOURCETOOLS_PLATFORM_PLATFORM_H #define SOURCETOOLS_PLATFORM_PLATFORM_H #ifdef _WIN32 # define SOURCETOOLS_PLATFORM_WINDOWS #endif #ifdef __APPLE__ # define SOURCETOOLS_PLATFORM_MACOS #endif #ifdef __linux__ # define SOURCETOOLS_PLATFORM_LINUX #endif #if defined(__sun) && defined(__SVR4) # define SOURCETOOLS_PLATFORM_SOLARIS #endif #endif /* SOURCETOOLS_PLATFORM_PLATFORM_H */ sourcetools/inst/include/sourcetools/multibyte/0000755000176200001440000000000013045713105021631 5ustar liggesuserssourcetools/inst/include/sourcetools/multibyte/multibyte.h0000644000176200001440000000133113045713105024016 0ustar liggesusers#ifndef SOURCETOOLS_MULTIBYTE_MULTIBYTE_H #define SOURCETOOLS_MULTIBYTE_MULTIBYTE_H #include #include namespace sourcetools { namespace multibyte { template inline bool countWhitespaceBytes(const char* data, T* pBytes) { wchar_t ch; T bytes = 0; const char* it = data; while (true) { int status = std::mbtowc(&ch, it, MB_CUR_MAX); if (status == 0) { break; } else if (status == -1) { break; } if (!std::iswspace(ch)) break; bytes += status; it += status; } *pBytes = bytes; return bytes != 0; } } // namespace multibyte } // namespace sourcetools #endif /* SOURCETOOLS_MULTIBYTE_MULTIBYTE_H */ sourcetools/inst/include/sourcetools/cursor/0000755000176200001440000000000013267771000021134 5ustar liggesuserssourcetools/inst/include/sourcetools/cursor/cursor.h0000644000176200001440000000031213045713105022612 0ustar liggesusers#ifndef SOURCETOOLS_CURSOR_CURSOR_H #define SOURCETOOLS_CURSOR_CURSOR_H #include #include #endif /* SOURCETOOLS_CURSOR_CURSOR_H */ sourcetools/inst/include/sourcetools/cursor/TokenCursor.h0000644000176200001440000001454613267771000023575 0ustar liggesusers#ifndef SOURCETOOLS_CURSOR_TOKEN_CURSOR_H #define SOURCETOOLS_CURSOR_TOKEN_CURSOR_H #include #include #include #include namespace sourcetools { namespace cursors { class TokenCursor { private: typedef collections::Position Position; typedef tokens::Token Token; public: TokenCursor(const std::vector& tokens) : tokens_(tokens), offset_(0), n_(tokens.size()), noSuchToken_(tokens::END) {} bool moveToNextToken() { if (UNLIKELY(offset_ >= n_ - 1)) return false; ++offset_; return true; } bool moveToNextSignificantToken() { if (!moveToNextToken()) return false; if (!fwdOverWhitespaceAndComments()) return false; return true; } bool moveToPreviousToken() { if (UNLIKELY(offset_ == 0)) return false; --offset_; return true; } bool moveToPreviousSignificantToken() { if (!moveToPreviousToken()) return false; if (!bwdOverWhitespaceAndComments()) return false; return true; } const Token& peekFwd(std::size_t offset = 1) const { std::size_t index = offset_ + offset; if (UNLIKELY(index >= n_)) return noSuchToken_; return tokens_[index]; } const Token& peekBwd(std::size_t offset = 1) const { if (UNLIKELY(offset > offset_)) return noSuchToken_; std::size_t index = offset_ - offset; return tokens_[index]; } const Token& currentToken() const { if (UNLIKELY(offset_ >= n_)) return noSuchToken_; return tokens_[offset_]; } operator const Token&() const { return currentToken(); } bool fwdOverWhitespace() { while (isType(tokens::WHITESPACE)) if (!moveToNextToken()) return false; return true; } bool bwdOverWhitespace() { while (isType(tokens::WHITESPACE)) if (!moveToPreviousToken()) return false; return true; } bool fwdOverComments() { while (isType(tokens::COMMENT)) if (!moveToNextToken()) return false; return true; } bool bwdOverComments() { while (isType(tokens::COMMENT)) if (!moveToPreviousToken()) return false; return true; } bool fwdOverWhitespaceAndComments() { while (isType(tokens::COMMENT) || isType(tokens::WHITESPACE)) if (!moveToNextToken()) return false; return true; } bool bwdOverWhitespaceAndComments() { while (isType(tokens::COMMENT) || isType(tokens::WHITESPACE)) if (!moveToPreviousToken()) return false; return true; } const Token& nextSignificantToken(std::size_t times = 1) const { TokenCursor clone(*this); for (std::size_t i = 0; i < times; ++i) clone.moveToNextSignificantToken(); return clone; } const Token& previousSignificantToken(std::size_t times = 1) const { TokenCursor clone(*this); for (std::size_t i = 0; i < times; ++i) clone.moveToPreviousSignificantToken(); return clone; } bool moveToPosition(std::size_t row, std::size_t column) { return moveToPosition(Position(row, column)); } bool moveToPosition(const Position& target) { if (UNLIKELY(n_ == 0)) return false; if (UNLIKELY(tokens_[n_ - 1].position() <= target)) { offset_ = n_ - 1; return true; } std::size_t start = 0; std::size_t end = n_; std::size_t offset = 0; while (true) { offset = (start + end) / 2; const Position& current = tokens_[offset].position(); if (current == target || start == end) break; else if (current < target) start = offset + 1; else end = offset - 1; } offset_ = offset; return true; } template bool findFwd(F f) { do { if (f(this)) return true; } while (moveToNextToken()); return false; } template bool findBwd(F f) { do { if (f(this)) return true; } while (moveToPreviousToken()); return false; } bool findFwd(const char* contents) { return findFwd(std::string(contents, std::strlen(contents))); } bool findFwd(const std::string& contents) { do { if (currentToken().contentsEqual(contents)) return true; } while (moveToNextToken()); return false; } bool findBwd(const char* contents) { return findBwd(std::string(contents, std::strlen(contents))); } bool findBwd(const std::string& contents) { do { if (currentToken().contentsEqual(contents)) return true; } while (moveToPreviousToken()); return false; } bool fwdToMatchingBracket() { using namespace tokens; if (!isLeftBracket(currentToken())) return false; TokenType lhs = currentToken().type(); TokenType rhs = complement(lhs); std::size_t balance = 1; while (moveToNextSignificantToken()) { TokenType type = currentToken().type(); balance += type == lhs; balance -= type == rhs; if (balance == 0) return true; } return false; } bool bwdToMatchingBracket() { using namespace tokens; if (!isRightBracket(currentToken())) return false; TokenType lhs = currentToken().type(); TokenType rhs = complement(lhs); std::size_t balance = 1; while (moveToPreviousSignificantToken()) { TokenType type = currentToken().type(); balance += type == lhs; balance -= type == rhs; if (balance == 0) return true; } return false; } friend std::ostream& operator<<(std::ostream& os, const TokenCursor& cursor) { return os << toString(cursor.currentToken()); } tokens::TokenType type() const { return currentToken().type(); } bool isType(tokens::TokenType type) const { return currentToken().isType(type); } collections::Position position() const { return currentToken().position(); } std::size_t offset() const { return offset_; } std::size_t row() const { return currentToken().row(); } std::size_t column() const { return currentToken().column(); } private: const std::vector& tokens_; std::size_t offset_; std::size_t n_; Token noSuchToken_; }; } // namespace cursors inline std::string toString(const cursors::TokenCursor& cursor) { return toString(cursor.currentToken()); } } // namespace sourcetools #endif /* SOURCETOOLS_CURSOR_TOKEN_CURSOR_H */ sourcetools/inst/include/sourcetools/cursor/TextCursor.h0000644000176200001440000000256713267770776023464 0ustar liggesusers#ifndef SOURCETOOLS_CURSOR_TEXT_CURSOR_H #define SOURCETOOLS_CURSOR_TEXT_CURSOR_H #include #include #include namespace sourcetools { namespace cursors { class TextCursor { public: TextCursor(const char* text, std::size_t n) : text_(text), n_(n), offset_(0), position_(0, 0) { } char peek(std::size_t offset = 0) { std::size_t index = offset_ + offset; if (UNLIKELY(index >= n_)) return '\0'; return text_[index]; } void advance(std::size_t times = 1) { for (std::size_t i = 0; i < times; ++i) { if (peek() == '\n') { ++position_.row; position_.column = 0; } else { ++position_.column; } ++offset_; } } operator const char*() const { return text_ + offset_; } std::size_t offset() const { return offset_; } const collections::Position& position() const { return position_; } std::size_t row() const { return position_.row; } std::size_t column() const { return position_.column; } const char* begin() const { return text_; } const char* end() const { return text_ + n_; } private: const char* text_; std::size_t n_; std::size_t offset_; collections::Position position_; }; } // namespace cursors } // namespace sourcetools #endif /* SOURCETOOLS_CURSOR_TEXT_CURSOR_H */ sourcetools/inst/include/sourcetools.h0000644000176200001440000000052413267770776020014 0ustar liggesusers#ifndef SOURCE_TOOLS_H #define SOURCE_TOOLS_H #include #include #include #include #include #include #include #endif sourcetools/tests/0000755000176200001440000000000013045713105013774 5ustar liggesuserssourcetools/tests/testthat.R0000644000176200001440000000014013045713105015752 0ustar liggesusersif (require("testthat", quietly = TRUE)) { library(sourcetools) test_check("sourcetools") } sourcetools/tests/testthat/0000755000176200001440000000000013267774041015650 5ustar liggesuserssourcetools/tests/testthat/test-tokenize.R0000644000176200001440000001064513267770776020621 0ustar liggesuserscontext("Tokenizer") compare_tokens <- function(tokens, expected) { if (is.character(tokens)) tokens <- tokenize_string(tokens) expect_true( nrow(tokens) == length(expected), "different number of tokens" ) for (i in 1:nrow(tokens)) { expect_true( tokens$value[[i]] == expected[[i]], paste0("expected token '", tokens$value[[i]], "'; got '", expected[[i]], "'") ) } } test_that("Operators are tokenized correctly", { operators <- c( "::", ":::", "$", "@", "[", "[[", "^", "-", "+", ":", "*", "/", "+", "-", "<", ">", "<=", ">=", "==", "!=", "!", "&", "&&", "|", "||", "~", "->", "->>", "<-", "<<-", "=", "?", "**", "%%", "%for%" ) tokenized <- tokenize_string(paste(operators, collapse = " ")) for (operator in operators) { tokens <- tokenize_string(operator) expect_true(nrow(tokens) == 1, paste("expected a single token ('", operator, "')")) } }) test_that("Numbers are tokenized correctly", { numbers <- c("1", "1.0", "0.1", ".1", "0.1E1", "1L", "1.0L", "1.5L", "1E1", "1E-1", "1E-1L", ".100E-105L", "0.", "100.", "1e+09", "1e+90", "1e-90", "1e-00000000000000009") for (number in numbers) { tokens <- tokenize_string(number) expect_true(nrow(tokens) == 1, paste("expected a single token ('", number, "')", sep = "")) token <- as.list(tokens[1, ]) expect_true(token$type == "number", paste("expected a number ('", token$type, "')", sep = "")) } }) test_that("The tokenizer accepts UTF-8 symbols", { expect_true(nrow(tokenize_string("鬼")) == 1) }) test_that("The tokenizer works correctly", { # TODO: Should newlines be absorbed as part of the comment string? tokens <- tokenize_string("# A Comment\n") expected <- "# A Comment\n" compare_tokens(tokens, expected) tokens <- tokenize_string("a <- 1 + 2\n") compare_tokens( tokens, c("a", " ", "<-", " ", "1", " ", "+", " ", "2", "\n") ) compare_tokens( tokenize_string("a<-1"), c("a", "<-", "1") ) # NOTE: '-' sign tokenized separately from number compare_tokens( tokenize_string("a< -1"), c("a", "<", " ", "-", "1") ) compare_tokens("1.0E5L", "1.0E5L") compare_tokens(".1", ".1") compare_tokens("'\\''", "'\\''") compare_tokens(".a", ".a") compare_tokens("...", "...") compare_tokens(":=", ":=") compare_tokens("x ** 2", c("x", " ", "**", " ", "2")) }) test_that("`[[` and `[` are tokenized correctly", { compare_tokens("x[[1]]", c("x", "[[", "1", "]]")) # not really valid R code, but the tokenizer should still # get it right compare_tokens("[[[]]]", c("[[", "[", "]", "]]")) compare_tokens( "x[[a[b[[c[1]]]]]]", c("x", "[[", "a", "[", "b", "[[", "c", "[", "1", "]", "]]", "]", "]]") ) }) test_that("Failures during number tokenization is detected", { tokens <- tokenize_string("1.5E---") expect_true(tokens$type[[1]] == "invalid") }) test_that("invalid number e.g. 1E1.5 tokenized as single entity", { tokens <- tokenize_string("1E1.5") expect_true(nrow(tokens) == 1) expect_true(tokens$type[[1]] == "invalid") }) test_that("keywords are tokenized as keywords", { keywords <- c("if", "else", "repeat", "while", "function", "for", "in", "next", "break", "TRUE", "FALSE", "NULL", "Inf", "NaN", "NA", "NA_integer_", "NA_real_", "NA_complex_", "NA_character_") tokens <- lapply(keywords, function(keyword) { tokenize_string(keyword)[1, ] }) types <- unlist(lapply(tokens, `[[`, "type")) expect_true(all(types == "keyword")) }) test_that("comments without a trailing newline are tokenized", { tokens <- tokenize_string("# abc") expect_identical(tokens$type, "comment") }) test_that("tokenization errors handled correctly", { # previously, these reported an error where a NUL # byte was accidentally included as part of the # token value tokenize_string("`abc") tokenize_string("'abc") tokenize_string("\"abc") tokenize_string("%abc") }) test_that("files in packages are tokenized without errors", { skip_on_cran() paths <- list.dirs("~/git", full.names = TRUE, recursive = FALSE) packages <- paths[file.exists(file.path(paths, "DESCRIPTION"))] R <- file.path(packages, "R") for (dir in R) { files <- list.files(dir, pattern = "R$", full.names = TRUE) for (file in files) { tokens <- tokenize_file(file) errors <- tokens$type == "invalid" expect_true(all(errors == FALSE)) } } }) sourcetools/tests/testthat/helper-utf8.R0000644000176200001440000000007213045713105020121 0ustar liggesusersoctal <- "\012" hex <- "\xE2\x99\xA5" utf8 <- "\u2665" sourcetools/tests/testthat/test-read.R0000644000176200001440000000113513045713105017647 0ustar liggesuserscontext("Reader") files <- list.files() test_that("read_lines and readLines agree on output", { for (file in files) { expect_identical( readLines(file), sourcetools::read_lines(file) ) } }) test_that("read and readChar agree on output", { for (file in files) { expect_identical( readChar(file, file.info(file)$size, TRUE), sourcetools::read(file) ) } }) test_that("read_bytes and readBin agree on output", { for (file in files) { expect_identical( readBin(file, "raw", file.info(file)$size), sourcetools::read_bytes(file) ) } }) sourcetools/src/0000755000176200001440000000000013267771672013444 5ustar liggesuserssourcetools/src/Makevars0000644000176200001440000000004113267771673015134 0ustar liggesusersPKG_CPPFLAGS = -I../inst/include sourcetools/src/sourcetools-init.c0000644000176200001440000000207113267771673017133 0ustar liggesusers#include #include #include // for NULL #include /* .Call calls */ extern SEXP sourcetools_read(SEXP); extern SEXP sourcetools_read_bytes(SEXP); extern SEXP sourcetools_read_lines(SEXP); extern SEXP sourcetools_read_lines_bytes(SEXP); extern SEXP sourcetools_tokenize_file(SEXP); extern SEXP sourcetools_tokenize_string(SEXP); static const R_CallMethodDef CallEntries[] = { {"sourcetools_read", (DL_FUNC) &sourcetools_read, 1}, {"sourcetools_read_bytes", (DL_FUNC) &sourcetools_read_bytes, 1}, {"sourcetools_read_lines", (DL_FUNC) &sourcetools_read_lines, 1}, {"sourcetools_read_lines_bytes", (DL_FUNC) &sourcetools_read_lines_bytes, 1}, {"sourcetools_tokenize_file", (DL_FUNC) &sourcetools_tokenize_file, 1}, {"sourcetools_tokenize_string", (DL_FUNC) &sourcetools_tokenize_string, 1}, {NULL, NULL, 0} }; void R_init_sourcetools(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } sourcetools/src/Tokenizer.cpp0000644000176200001440000000543113267771673016126 0ustar liggesusers#include #define R_NO_REMAP #include #include namespace sourcetools { namespace { void asDataFrame(SEXP listSEXP, int n) { r::Protect protect; SEXP classSEXP = protect(Rf_mkString("data.frame")); Rf_setAttrib(listSEXP, R_ClassSymbol, classSEXP); SEXP rownamesSEXP = protect(Rf_allocVector(INTSXP, 2)); INTEGER(rownamesSEXP)[0] = NA_INTEGER; INTEGER(rownamesSEXP)[1] = -n; Rf_setAttrib(listSEXP, R_RowNamesSymbol, rownamesSEXP); } SEXP asSEXP(const std::vector& tokens) { r::Protect protect; std::size_t n = tokens.size(); SEXP resultSEXP = protect(Rf_allocVector(VECSXP, 4)); // Set vector elements SEXP valueSEXP = protect(Rf_allocVector(STRSXP, n)); SET_VECTOR_ELT(resultSEXP, 0, valueSEXP); for (std::size_t i = 0; i < n; ++i) { const std::string& contents = tokens[i].contents(); SEXP charSEXP = Rf_mkCharLen(contents.c_str(), contents.size()); SET_STRING_ELT(valueSEXP, i, charSEXP); } SEXP rowSEXP = protect(Rf_allocVector(INTSXP, n)); SET_VECTOR_ELT(resultSEXP, 1, rowSEXP); for (std::size_t i = 0; i < n; ++i) INTEGER(rowSEXP)[i] = tokens[i].row() + 1; SEXP columnSEXP = protect(Rf_allocVector(INTSXP, n)); SET_VECTOR_ELT(resultSEXP, 2, columnSEXP); for (std::size_t i = 0; i < n; ++i) INTEGER(columnSEXP)[i] = tokens[i].column() + 1; SEXP typeSEXP = protect(Rf_allocVector(STRSXP, n)); SET_VECTOR_ELT(resultSEXP, 3, typeSEXP); for (std::size_t i = 0; i < n; ++i) { const std::string& type = toString(tokens[i].type()); SEXP charSEXP = Rf_mkCharLen(type.c_str(), type.size()); SET_STRING_ELT(typeSEXP, i, charSEXP); } // Set names SEXP namesSEXP = protect(Rf_allocVector(STRSXP, 4)); SET_STRING_ELT(namesSEXP, 0, Rf_mkChar("value")); SET_STRING_ELT(namesSEXP, 1, Rf_mkChar("row")); SET_STRING_ELT(namesSEXP, 2, Rf_mkChar("column")); SET_STRING_ELT(namesSEXP, 3, Rf_mkChar("type")); Rf_setAttrib(resultSEXP, R_NamesSymbol, namesSEXP); asDataFrame(resultSEXP, n); return resultSEXP; } } // anonymous namespace } // namespace sourcetools extern "C" SEXP sourcetools_tokenize_file(SEXP absolutePathSEXP) { typedef sourcetools::tokens::Token Token; const char* absolutePath = CHAR(STRING_ELT(absolutePathSEXP, 0)); std::string contents; if (!sourcetools::read(absolutePath, &contents)) { Rf_warning("Failed to read file"); return R_NilValue; } const std::vector& tokens = sourcetools::tokenize(contents); return sourcetools::asSEXP(tokens); } extern "C" SEXP sourcetools_tokenize_string(SEXP stringSEXP) { typedef sourcetools::tokens::Token Token; SEXP charSEXP = STRING_ELT(stringSEXP, 0); const std::vector& tokens = sourcetools::tokenize(CHAR(charSEXP), Rf_length(charSEXP)); return sourcetools::asSEXP(tokens); } sourcetools/src/Reader.cpp0000644000176200001440000000454613267771673015364 0ustar liggesusers#include #include #include #define R_NO_REMAP #include #include extern "C" SEXP sourcetools_read(SEXP absolutePathSEXP) { const char* absolutePath = CHAR(STRING_ELT(absolutePathSEXP, 0)); std::string contents; bool result = sourcetools::read(absolutePath, &contents); if (!result) { Rf_warning("Failed to read file"); return R_NilValue; } sourcetools::r::Protect protect; SEXP resultSEXP = protect(Rf_allocVector(STRSXP, 1)); SET_STRING_ELT(resultSEXP, 0, Rf_mkCharLen(contents.c_str(), contents.size())); return resultSEXP; } extern "C" SEXP sourcetools_read_lines(SEXP absolutePathSEXP) { const char* absolutePath = CHAR(STRING_ELT(absolutePathSEXP, 0)); std::vector lines; bool result = sourcetools::read_lines(absolutePath, &lines); if (!result) { Rf_warning("Failed to read file"); return R_NilValue; } std::size_t n = lines.size(); sourcetools::r::Protect protect; SEXP resultSEXP = protect(Rf_allocVector(STRSXP, n)); for (std::size_t i = 0; i < n; ++i) SET_STRING_ELT(resultSEXP, i, Rf_mkCharLen(lines[i].c_str(), lines[i].size())); return resultSEXP; } extern "C" SEXP sourcetools_read_bytes(SEXP absolutePathSEXP) { const char* absolutePath = CHAR(STRING_ELT(absolutePathSEXP, 0)); std::string contents; bool result = sourcetools::read(absolutePath, &contents); if (!result) { Rf_warning("Failed to read file"); return R_NilValue; } sourcetools::r::Protect protect; SEXP resultSEXP = protect(Rf_allocVector(RAWSXP, contents.size())); std::memcpy(RAW(resultSEXP), contents.c_str(), contents.size()); return resultSEXP; } extern "C" SEXP sourcetools_read_lines_bytes(SEXP absolutePathSEXP) { const char* absolutePath = CHAR(STRING_ELT(absolutePathSEXP, 0)); std::vector lines; bool result = sourcetools::read_lines(absolutePath, &lines); if (!result) { Rf_warning("Failed to read file"); return R_NilValue; } std::size_t n = lines.size(); sourcetools::r::Protect protect; SEXP resultSEXP = protect(Rf_allocVector(VECSXP, n)); for (std::size_t i = 0; i < n; ++i) { SEXP rawSEXP = Rf_allocVector(RAWSXP, lines[i].size()); std::memcpy(RAW(rawSEXP), lines[i].c_str(), lines[i].size()); SET_VECTOR_ELT(resultSEXP, i, rawSEXP); } return resultSEXP; } sourcetools/src/Makevars.win0000644000176200001440000000004113267771673015730 0ustar liggesusersPKG_CPPFLAGS = -I../inst/include sourcetools/NAMESPACE0000644000176200001440000000037613267770776014106 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(print,RTokens) export(read) export(read_bytes) export(read_lines) export(read_lines_bytes) export(tokenize) export(tokenize_file) export(tokenize_string) useDynLib(sourcetools, .registration = TRUE) sourcetools/NEWS.md0000644000176200001440000000212013267771126013740 0ustar liggesusers# sourcetools 0.1.7 - Ensure tests past on platforms where `char` is unsigned. (#21) # sourcetools 0.1.6 - Register native routines. - Ensure that file handles on Windows are properly closed after reading a file. # sourcetools 0.1.5 - Ensure that symbols included from e.g. ``, `` are resolved using a `std::` prefix. # sourcetools 0.1.4 - More work to ensure `sourcetools` can build on Solaris. # sourcetools 0.1.3 - Relax C++11 requirement, to ensure that `sourcetools` can build on machines with older compilers (e.g. gcc 4.4). # sourcetools 0.1.2 - Disable failing tests on Solaris. # sourcetools 0.1.1 - Rename token type `ERR` to `INVALID` to fix build errors on Solaris. # sourcetools 0.1.0 ## Features The first release of `sourcetools` comes with a small set of features exposed to R: - `read(file)`: Read a file (as a string). Similar to `readChar()`, but faster (and maybe be optimized to use a memory mapped file reader in the future). - `tokenize_file(file)`: Tokenize an R script. - `tokenize_string(string)`: Tokenize a string of R code. sourcetools/R/0000755000176200001440000000000013267770776013062 5ustar liggesuserssourcetools/R/sourcetools.R0000644000176200001440000000446713267770776015601 0ustar liggesusers#' @useDynLib sourcetools NULL #' Read the Contents of a File #' #' Read the contents of a file into a string (or, in the case of #' \code{read_lines}, a vector of strings). #' #' @param path A file path. #' #' @name read #' @rdname read #' @export read <- function(path) { path <- normalizePath(path, mustWork = TRUE) .Call("sourcetools_read", path, PACKAGE = "sourcetools") } #' @name read #' @rdname read #' @export read_lines <- function(path) { path <- normalizePath(path, mustWork = TRUE) .Call("sourcetools_read_lines", path, PACKAGE = "sourcetools") } #' @name read #' @rdname read #' @export read_bytes <- function(path) { path <- normalizePath(path, mustWork = TRUE) .Call("sourcetools_read_bytes", path, PACKAGE = "sourcetools") } #' @name read #' @rdname read #' @export read_lines_bytes <- function(path) { path <- normalizePath(path, mustWork = TRUE) .Call("sourcetools_read_lines_bytes", path, PACKAGE = "sourcetools") } #' Tokenize R Code #' #' Tools for tokenizing \R code. #' #' @param file,path A file path. #' @param text,string \R code as a character vector of length one. #' #' @note Line numbers are determined by existence of the \code{\\n} #' line feed character, under the assumption that code being tokenized #' will use either \code{\\n} to indicate newlines (as on modern #' Unix systems), or \code{\\r\\n} as on Windows. #' #' @return A \code{data.frame} with the following columns: #' #' \tabular{ll}{ #' \code{value} \tab The token's contents, as a string. \cr #' \code{row} \tab The row where the token is located. \cr #' \code{column} \tab The column where the token is located. \cr #' \code{type} \tab The token type, as a string. \cr #' } #' #' @rdname tokenize-methods #' @export #' @examples #' tokenize_string("x <- 1 + 2") tokenize_file <- function(path) { path <- normalizePath(path, mustWork = TRUE) .Call("sourcetools_tokenize_file", path, PACKAGE = "sourcetools") } #' @rdname tokenize-methods #' @export tokenize_string <- function(string) { .Call("sourcetools_tokenize_string", as.character(string), PACKAGE = "sourcetools") } #' @rdname tokenize-methods #' @export tokenize <- function(file = "", text = NULL) { if (is.null(text)) text <- read(file) tokenize_string(text) } #' @export print.RTokens <- function(x, ...) { print.data.frame(x, ...) } sourcetools/R/util.R0000644000176200001440000000016413267770776014163 0ustar liggesuserssearch_objects <- function() { lapply(seq_along(search()), function(i) { ls(pos = i, all.names = TRUE) }) } sourcetools/README.md0000644000176200001440000000437313267771305014134 0ustar liggesusers [![Travis-CI Build Status](https://travis-ci.org/kevinushey/sourcetools.svg?branch=master)](https://travis-ci.org/kevinushey/sourcetools) [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/kevinushey/sourcetools?branch=master&svg=true)](https://ci.appveyor.com/project/kevinushey/sourcetools) # sourcetools Tools for reading, tokenizing, and (eventually) parsing `R` code. ## Getting Started `sourcetools` is not yet on CRAN -- install with ```r devtools::install_github("kevinushey/sourcetools") ``` ## Reading `sourcetools` comes with a couple fast functions for reading files into `R`. Use `read()` and `read_lines()` to quickly read a file into `R` as character vectors. `read_lines()` handles both Windows style `\r\n` line endings, as well as Unix-style `\n` endings. ```r text <- replicate(10000, paste(sample(letters, 200, TRUE), collapse = "")) file <- tempfile() cat(text, file = file, sep = "\n") mb <- microbenchmark::microbenchmark(times = 10, readChar = readChar(file, file.info(file)$size, TRUE), readLines = readLines(file), read = read(file), read_lines = read_lines(file) ) print(mb, digits = 3) ``` ``` ## Unit: milliseconds ## expr min lq mean median uq max neval cld ## readChar 5.2 6.54 10.5 7.02 8.73 36.56 10 ab ## readLines 155.9 159.69 162.4 161.95 163.15 171.76 10 c ## read 5.3 5.48 6.5 5.97 7.52 9.35 10 a ## read_lines 13.5 13.95 14.4 14.09 14.50 16.97 10 b ``` ```r unlink(file) ``` ## Tokenization `sourcetools` provides the `tokenize_string()` and `tokenize_file()` functions for generating a tokenized representation of R code. These produce 'raw' tokenized representations of the code, with each token's value as a string, and a recorded row, column, and type: ```r tokenize_string("if (x < 10) 20") ``` ``` ## value row column type ## 1 if 1 1 keyword ## 2 1 3 whitespace ## 3 ( 1 4 bracket ## 4 x 1 5 symbol ## 5 1 6 whitespace ## 6 < 1 7 operator ## 7 1 8 whitespace ## 8 10 1 9 number ## 9 ) 1 11 bracket ## 10 1 12 whitespace ## 11 20 1 13 number ``` sourcetools/MD50000644000176200001440000000642613267774041013166 0ustar liggesusers6de977696a44f556eabaaec5bcbbac51 *DESCRIPTION 472904db5a93a07692b3fe24cabcf96c *LICENSE ce97ebaad581d3ce2c897cdcffa1d66c *NAMESPACE 90b92a7f767af179e20f200b0a3f6cf5 *NEWS.md 37e5f77d6f49acabdf3daef30abe34f7 *R/sourcetools.R d33c3c2ece7b73587126bae7e012a549 *R/util.R d82a27087d6f3fac9d06978a31640aed *README.md c5215eb66349006d73ad7e65ce54046b *inst/include/sourcetools.h f071c0148a4629ea8150b63d2627cf0c *inst/include/sourcetools/collection/Position.h 019b4182b5d6b9700562f8d3b90ac1f4 *inst/include/sourcetools/collection/Range.h d11b4138653828a197304b1cb692c614 *inst/include/sourcetools/collection/collection.h d77f935ab3b7da52405a501f2404d18e *inst/include/sourcetools/core/core.h 9ad041bb1ce4251ab7afccee1ac6de3d *inst/include/sourcetools/core/macros.h a75b84b3f91f4ab540aac42267e860ad *inst/include/sourcetools/core/util.h 353bc7e4e2bf9b62e301243a8a631e52 *inst/include/sourcetools/cursor/TextCursor.h 81b2ef5fe49d6e2822e8703ede20414c *inst/include/sourcetools/cursor/TokenCursor.h 42d2f27e9ae85211cd5542eaecc7c37b *inst/include/sourcetools/cursor/cursor.h 973781254e2b3ae94eb0770554efae33 *inst/include/sourcetools/multibyte/multibyte.h 63818672820bc3a620d09e67c68af7c2 *inst/include/sourcetools/platform/platform.h f10cddd374f1c671456a4120496a4291 *inst/include/sourcetools/r/RCallRecurser.h 78a60ba9c51951eb8b4211a0bd8bc998 *inst/include/sourcetools/r/RConverter.h 5d890077972c2a4eb07c98aafc63690c *inst/include/sourcetools/r/RFunctions.h eab3a99f83f6bfa7b7cd0b7e8072edaa *inst/include/sourcetools/r/RHeaders.h dc5e82ab54673bd892e6b420896f101b *inst/include/sourcetools/r/RNonStandardEvaluation.h 1e2951a40e7692881c7ab4645796a9ee *inst/include/sourcetools/r/RUtils.h da17972b93e9b4e91554f705b4cda985 *inst/include/sourcetools/r/r.h 8ea2ac860e800d20143be7792483ba7c *inst/include/sourcetools/read/MemoryMappedReader.h 8096eb102d9679287cc8fa705d3a21da *inst/include/sourcetools/read/posix/FileConnection.h e0147869348d196193c13e89762670f0 *inst/include/sourcetools/read/posix/MemoryMappedConnection.h 44abbae26e9ab704c92a9ce6c98ba33a *inst/include/sourcetools/read/read.h e77d3eb6a47db7e6d1e65f59eb3ab2c5 *inst/include/sourcetools/read/windows/FileConnection.h 35b83b8a537f7c447410e3c5e6616cef *inst/include/sourcetools/read/windows/MemoryMappedConnection.h b80180f53ca809b9ba6d4e7df6316e0b *inst/include/sourcetools/tests/testthat.h c6afafc697b747111348dabb88d9fb4a *inst/include/sourcetools/tokenization/Registration.h 9ab2cf85d30171f4744d21d10c6cd532 *inst/include/sourcetools/tokenization/Token.h 6599c5daf2f3e59861153982cc00efef *inst/include/sourcetools/tokenization/Tokenizer.h 00c91c6e20bc534fa3a689c79770f46c *inst/include/sourcetools/tokenization/tokenization.h 1491ededa24449d40554757c96bebaf0 *inst/include/sourcetools/utf8/utf8.h 3005e918c6f7dbf54993a04b74ca9e54 *man/read.Rd a94108446e930c7c488c695e1618f049 *man/tokenize-methods.Rd 3f03da795dd26373156bddc78d41e95d *src/Makevars 3f03da795dd26373156bddc78d41e95d *src/Makevars.win 43927b22e2812e5ba35419390a7933ae *src/Reader.cpp 78946eee022f534743af918d651dd244 *src/Tokenizer.cpp f4bd98cee0ca584880a0cac98f48d527 *src/sourcetools-init.c 175dc27564828d1abeb87bc004d02266 *tests/testthat.R b6ba9001993894a2085c981a6c58018d *tests/testthat/helper-utf8.R 0e31fb15ea8b66d310162f60c434ed7d *tests/testthat/test-read.R 23eb599dfa50f8915e1448b88e1719a4 *tests/testthat/test-tokenize.R sourcetools/DESCRIPTION0000644000176200001440000000131613267774041014355 0ustar liggesusersPackage: sourcetools Type: Package Title: Tools for Reading, Tokenizing and Parsing R Code Version: 0.1.7 Author: Kevin Ushey Maintainer: Kevin Ushey Description: Tools for the reading and tokenization of R code. The 'sourcetools' package provides both an R and C++ interface for the tokenization of R code, and helpers for interacting with the tokenized representation of R code. License: MIT + file LICENSE LazyData: TRUE Depends: R (>= 3.0.2) Suggests: testthat RoxygenNote: 5.0.1 BugReports: https://github.com/kevinushey/sourcetools/issues Encoding: UTF-8 NeedsCompilation: yes Packaged: 2018-04-25 03:19:22 UTC; kevin Repository: CRAN Date/Publication: 2018-04-25 03:38:09 UTC sourcetools/man/0000755000176200001440000000000013267770776013434 5ustar liggesuserssourcetools/man/read.Rd0000644000176200001440000000071113267770776014635 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sourcetools.R \name{read} \alias{read} \alias{read_bytes} \alias{read_lines} \alias{read_lines_bytes} \title{Read the Contents of a File} \usage{ read(path) read_lines(path) read_bytes(path) read_lines_bytes(path) } \arguments{ \item{path}{A file path.} } \description{ Read the contents of a file into a string (or, in the case of \code{read_lines}, a vector of strings). } sourcetools/man/tokenize-methods.Rd0000644000176200001440000000204413267770776017214 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sourcetools.R \name{tokenize_file} \alias{tokenize} \alias{tokenize_file} \alias{tokenize_string} \title{Tokenize R Code} \usage{ tokenize_file(path) tokenize_string(string) tokenize(file = "", text = NULL) } \arguments{ \item{file, path}{A file path.} \item{text, string}{\R code as a character vector of length one.} } \value{ A \code{data.frame} with the following columns: \tabular{ll}{ \code{value} \tab The token's contents, as a string. \cr \code{row} \tab The row where the token is located. \cr \code{column} \tab The column where the token is located. \cr \code{type} \tab The token type, as a string. \cr } } \description{ Tools for tokenizing \R code. } \note{ Line numbers are determined by existence of the \code{\\n} line feed character, under the assumption that code being tokenized will use either \code{\\n} to indicate newlines (as on modern Unix systems), or \code{\\r\\n} as on Windows. } \examples{ tokenize_string("x <- 1 + 2") } sourcetools/LICENSE0000644000176200001440000000005613267770776013667 0ustar liggesusersYEAR: 2015-2016 COPYRIGHT HOLDER: Kevin Ushey