pax_global_header00006660000000000000000000000064133723643560014526gustar00rootroot0000000000000052 comment=0a7afdc5d192666ad855dbc35efc58482806adc6 liquid-c-4.0.0/000077500000000000000000000000001337236435600132365ustar00rootroot00000000000000liquid-c-4.0.0/.gitignore000066400000000000000000000001021337236435600152170ustar00rootroot00000000000000*.gem *.rbc Gemfile.lock pkg tmp *.o *.bundle ext/*/Makefile *.so liquid-c-4.0.0/.travis.yml000066400000000000000000000001401337236435600153420ustar00rootroot00000000000000language: ruby rvm: - 2.2 - 2.5 - ruby-head sudo: false notifications: disable: true liquid-c-4.0.0/Gemfile000066400000000000000000000003131337236435600145260ustar00rootroot00000000000000source 'https://rubygems.org' gemspec gem 'liquid', github: 'Shopify/liquid', branch: 'master' group :test do gem 'spy', '0.4.1' gem 'benchmark-ips' end group :development do gem 'byebug' end liquid-c-4.0.0/LICENSE.txt000066400000000000000000000020501337236435600150560ustar00rootroot00000000000000Copyright (c) 2014 Shopify MIT License Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. liquid-c-4.0.0/README.md000066400000000000000000000026541337236435600145240ustar00rootroot00000000000000# Liquid::C [![Build Status](https://travis-ci.org/Shopify/liquid-c.svg?branch=master)](https://travis-ci.org/Shopify/liquid-c) Partial native implementation of the liquid ruby gem in C. ## Installation Add these lines to your application's Gemfile: gem 'liquid', github: 'Shopify/liquid', branch: 'master' gem 'liquid-c', github: 'Shopify/liquid-c', branch: 'master' And then execute: $ bundle ## Usage require 'liquid/c' then just use the documented API for the liquid Gem. ## Restrictions * Input strings are assumed to be UTF-8 encoded strings * Tag#parse(tokens) is given a Liquid::Tokenizer object, instead of an array of strings, which only implements the shift method to get the next token. ## Performance To compare Liquid-C's performance with plain Liquid run bundle exec rake compare:run The latest benchmark results are shown below: user system total real Liquid: 0.000000 0.000000 246.950000 (247.499526) Liquid-C: 0.000000 0.010000 224.270000 (224.794395) Ratio: 90.82619215891624% ## Developing bundle install # run tests bundle exec rake ## Contributing 1. Fork it ( http://github.com/Shopify/liquid-c/fork ) 2. Create your feature branch (`git checkout -b my-new-feature`) 3. Commit your changes (`git commit -am 'Add some feature'`) 4. Push to the branch (`git push origin my-new-feature`) 5. Create new Pull Request liquid-c-4.0.0/Rakefile000066400000000000000000000032171337236435600147060ustar00rootroot00000000000000require 'rake' require 'rake/testtask' require 'bundler/gem_tasks' require 'rake/extensiontask' require 'benchmark' ENV['DEBUG'] = 'true' Rake::ExtensionTask.new("liquid_c") task :default => :test task :test => ['test:unit', 'test:liquid'] namespace :test do Rake::TestTask.new(:unit => :compile) do |t| t.libs << 'lib' << 'test' t.test_files = FileList['test/unit/**/*_test.rb'] end desc 'run test suite with default parser' Rake::TestTask.new(:base_liquid => :compile) do |t| t.libs << 'lib' t.test_files = ['test/liquid_test.rb'] end desc 'runs test suite with both strict and lax parsers' task :liquid do ENV['LIQUID_PARSER_MODE'] = 'lax' Rake::Task['test:base_liquid'].invoke ENV['LIQUID_PARSER_MODE'] = 'strict' Rake::Task['test:base_liquid'].reenable Rake::Task['test:base_liquid'].invoke end end namespace :benchmark do desc "Run the liquid benchmark with lax parsing" task :run do ruby "./performance.rb c benchmark lax" end desc "Run the liquid benchmark with strict parsing" task :strict do ruby "./performance.rb c benchmark strict" end end namespace :profile do desc "Run the liquid profile/performance coverage" task :run do ruby "./performance.rb c profile lax" end desc "Run the liquid profile/performance coverage with strict parsing" task :strict do ruby "./performance.rb c profile strict" end end namespace :compare do %w(lax warn strict).each do |type| desc "Compare Liquid to Liquid-C in #{type} mode" task type.to_sym do ruby "./performance.rb bare benchmark #{type}" ruby "./performance.rb c benchmark #{type}" end end end liquid-c-4.0.0/ext/000077500000000000000000000000001337236435600140365ustar00rootroot00000000000000liquid-c-4.0.0/ext/liquid_c/000077500000000000000000000000001337236435600156275ustar00rootroot00000000000000liquid-c-4.0.0/ext/liquid_c/block.c000066400000000000000000000117631337236435600170750ustar00rootroot00000000000000#include "liquid.h" #include "tokenizer.h" #include static ID intern_raise_missing_variable_terminator, intern_raise_missing_tag_terminator, intern_nodelist, intern_blank, intern_is_blank, intern_clear, intern_registered_tags, intern_parse, intern_square_brackets, intern_set_line_number; static int is_id(int c) { return rb_isalnum(c) || c == '_'; } inline static const char *read_while(const char *start, const char *end, int (func)(int)) { while (start < end && func((unsigned char) *start)) start++; return start; } inline static const char *read_while_end(const char *start, const char *end, int (func)(int)) { end--; while (start < end && func((unsigned char) *end)) end--; end++; return end; } static VALUE rb_block_parse(VALUE self, VALUE tokens, VALUE options) { tokenizer_t *tokenizer; Tokenizer_Get_Struct(tokens, tokenizer); token_t token; VALUE tags = Qnil; VALUE nodelist = rb_ivar_get(self, intern_nodelist); while (true) { if (tokenizer->line_number != 0) { rb_funcall(options, intern_set_line_number, 1, UINT2NUM(tokenizer->line_number)); } tokenizer_next(tokenizer, &token); switch (token.type) { case TOKENIZER_TOKEN_NONE: return rb_yield_values(2, Qnil, Qnil); case TOKEN_INVALID: { VALUE str = rb_enc_str_new(token.str, token.length, utf8_encoding); ID raise_method_id = intern_raise_missing_variable_terminator; if (token.str[1] == '%') raise_method_id = intern_raise_missing_tag_terminator; return rb_funcall(self, raise_method_id, 2, str, options); } case TOKEN_RAW: { const char *start = token.str, *end = token.str + token.length, *token_start = start, *token_end = end; if(token.lstrip) token_start = read_while(start, end, rb_isspace); if(token.rstrip) token_end = read_while_end(token_start, end, rb_isspace); VALUE str = rb_enc_str_new(token_start, token_end - token_start, utf8_encoding); rb_ary_push(nodelist, str); if (rb_ivar_get(self, intern_blank) == Qtrue) { const char *end = token.str + token.length; if (read_while(token.str, end, rb_isspace) < end) rb_ivar_set(self, intern_blank, Qfalse); } break; } case TOKEN_VARIABLE: { VALUE args[2] = {rb_enc_str_new(token.str + 2 + token.lstrip, token.length - 4 - token.lstrip - token.rstrip, utf8_encoding), options}; VALUE var = rb_class_new_instance(2, args, cLiquidVariable); rb_ary_push(nodelist, var); rb_ivar_set(self, intern_blank, Qfalse); break; } case TOKEN_TAG: { const char *start = token.str + 2 + token.lstrip, *end = token.str + token.length - 2 - token.rstrip; // Imitate \s*(\w+)\s*(.*)? regex const char *name_start = read_while(start, end, rb_isspace); const char *name_end = read_while(name_start, end, is_id); VALUE tag_name = rb_enc_str_new(name_start, name_end - name_start, utf8_encoding); if (tags == Qnil) tags = rb_funcall(self, intern_registered_tags, 0); VALUE tag_class = rb_funcall(tags, intern_square_brackets, 1, tag_name); const char *markup_start = read_while(name_end, end, rb_isspace); VALUE markup = rb_enc_str_new(markup_start, end - markup_start, utf8_encoding); if (tag_class == Qnil) return rb_yield_values(2, tag_name, markup); VALUE new_tag = rb_funcall(tag_class, intern_parse, 4, tag_name, markup, tokens, options); if (rb_ivar_get(self, intern_blank) == Qtrue && !RTEST(rb_funcall(new_tag, intern_is_blank, 0))) rb_ivar_set(self, intern_blank, Qfalse); rb_ary_push(nodelist, new_tag); break; } } } return Qnil; } void init_liquid_block() { intern_raise_missing_variable_terminator = rb_intern("raise_missing_variable_terminator"); intern_raise_missing_tag_terminator = rb_intern("raise_missing_tag_terminator"); intern_nodelist = rb_intern("@nodelist"); intern_blank = rb_intern("@blank"); intern_is_blank = rb_intern("blank?"); intern_clear = rb_intern("clear"); intern_registered_tags = rb_intern("registered_tags"); intern_parse = rb_intern("parse"); intern_square_brackets = rb_intern("[]"); intern_set_line_number = rb_intern("line_number="); VALUE cLiquidBlockBody = rb_const_get(mLiquid, rb_intern("BlockBody")); rb_define_method(cLiquidBlockBody, "c_parse", rb_block_parse, 2); } liquid-c-4.0.0/ext/liquid_c/block.h000066400000000000000000000001301337236435600170640ustar00rootroot00000000000000#if !defined(LIQUID_BLOCK_H) #define LIQUID_BLOCK_H void init_liquid_block(); #endif liquid-c-4.0.0/ext/liquid_c/extconf.rb000066400000000000000000000005141337236435600176220ustar00rootroot00000000000000require 'mkmf' $CFLAGS << ' -Wall -Werror -Wextra -Wno-unused-parameter -Wno-missing-field-initializers' compiler = RbConfig::MAKEFILE_CONFIG['CC'] if ENV['DEBUG'] == 'true' && compiler =~ /gcc|g\+\+/ $CFLAGS << ' -fbounds-check' end $warnflags.gsub!(/-Wdeclaration-after-statement/, "") if $warnflags create_makefile("liquid_c") liquid-c-4.0.0/ext/liquid_c/lexer.c000066400000000000000000000102121337236435600171060ustar00rootroot00000000000000#include "liquid.h" #include "lexer.h" #include const char *symbol_names[TOKEN_END] = { [TOKEN_NONE] = "none", [TOKEN_COMPARISON] = "comparison", [TOKEN_STRING] = "string", [TOKEN_NUMBER] = "number", [TOKEN_IDENTIFIER] = "id", [TOKEN_DOTDOT] = "dotdot", [TOKEN_EOS] = "end_of_string", [TOKEN_PIPE] = "pipe", [TOKEN_DOT] = "dot", [TOKEN_COLON] = "colon", [TOKEN_COMMA] = "comma", [TOKEN_OPEN_SQUARE] = "open_square", [TOKEN_CLOSE_SQUARE] = "close_square", [TOKEN_OPEN_ROUND] = "open_round", [TOKEN_CLOSE_ROUND] = "close_round", [TOKEN_QUESTION] = "question", [TOKEN_DASH] = "dash" }; inline static int is_identifier(char c) { return ISALNUM(c) || c == '_' || c == '-'; } inline static int is_special(char c) { switch (c) { case '|': case '.': case ':': case ',': case '[': case ']': case '(': case ')': case '?': case '-': return 1; } return 0; } // Returns a pointer to the character after the end of the match. inline static const char *prefix_end(const char *cur, const char *end, const char *pattern) { size_t pattern_len = strlen(pattern); if (pattern_len > (size_t)(end - cur)) return NULL; if (memcmp(cur, pattern, pattern_len) != 0) return NULL; return cur + pattern_len; } inline static const char *scan_past(const char *cur, const char *end, char target) { const char *match = memchr(cur + 1, target, end - cur - 1); return match ? match + 1 : NULL; } #define RETURN_TOKEN(t, n) { \ const char *tok_end = str + (n); \ token->type = (t); \ token->val = str; \ if (str != start) token->flags |= TOKEN_SPACE_PREFIX; \ if (tok_end < end && ISSPACE(*tok_end)) token->flags |= TOKEN_SPACE_SUFFIX; \ return (token->val_end = tok_end); \ } // Reads one token from start, and fills it into the token argument. // Returns the start of the next token if any, otherwise the end of the string. const char *lex_one(const char *start, const char *end, lexer_token_t *token) { // str references the start of the token, after whitespace is skipped. // cur references the currently processing character during iterative lexing. const char *str = start, *cur; while (str < end && ISSPACE(*str)) ++str; token->val = token->val_end = NULL; token->flags = 0; if (str >= end) return str; char c = *str; // First character of the token. char cn = '\0'; // Second character if available, for lookahead. if (str + 1 < end) cn = str[1]; switch (c) { case '<': RETURN_TOKEN(TOKEN_COMPARISON, cn == '>' || cn == '=' ? 2 : 1); case '>': RETURN_TOKEN(TOKEN_COMPARISON, cn == '=' ? 2 : 1); case '=': case '!': if (cn == '=') RETURN_TOKEN(TOKEN_COMPARISON, 2); break; case '.': if (cn == '.') RETURN_TOKEN(TOKEN_DOTDOT, 2); break; } if ((cur = prefix_end(str, end, "contains"))) RETURN_TOKEN(TOKEN_COMPARISON, cur - str); if (c == '\'' || c == '"') { cur = scan_past(str, end, c); if (cur) { // Quote was properly terminated. RETURN_TOKEN(TOKEN_STRING, cur - str); } } if (ISDIGIT(c) || c == '-') { int has_dot = 0; cur = str; while (++cur < end) { if (!has_dot && *cur == '.') { has_dot = 1; } else if (!ISDIGIT(*cur)) { break; } } cur--; // Point to last digit (or dot). if (*cur == '.') { cur--; // Ignore any trailing dot. has_dot = 0; } if (*cur != '-') { if (has_dot) token->flags |= TOKEN_FLOAT_NUMBER; RETURN_TOKEN(TOKEN_NUMBER, cur + 1 - str); } } if (ISALPHA(c) || c == '_') { cur = str; while (++cur < end && is_identifier(*cur)) {} if (cur < end && *cur == '?') cur++; RETURN_TOKEN(TOKEN_IDENTIFIER, cur - str); } if (is_special(c)) RETURN_TOKEN(c, 1); rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected character %c", c); return NULL; } #undef RETURN_TOKEN liquid-c-4.0.0/ext/liquid_c/lexer.h000066400000000000000000000020031337236435600171120ustar00rootroot00000000000000#if !defined(LIQUID_LEXER_H) #define LIQUID_LEXER_H enum lexer_token_type { TOKEN_NONE, TOKEN_COMPARISON, TOKEN_STRING, TOKEN_NUMBER, TOKEN_IDENTIFIER, TOKEN_DOTDOT, TOKEN_EOS, TOKEN_PIPE = '|', TOKEN_DOT = '.', TOKEN_COLON = ':', TOKEN_COMMA = ',', TOKEN_OPEN_SQUARE = '[', TOKEN_CLOSE_SQUARE = ']', TOKEN_OPEN_ROUND = '(', TOKEN_CLOSE_ROUND = ')', TOKEN_QUESTION = '?', TOKEN_DASH = '-', TOKEN_END = 256 }; #define TOKEN_SPACE_PREFIX 0x1 #define TOKEN_SPACE_SUFFIX 0x2 #define TOKEN_SPACE_AFFIX (TOKEN_SPACE_PREFIX | TOKEN_SPACE_SUFFIX) #define TOKEN_FLOAT_NUMBER 0x4 typedef struct lexer_token { unsigned char type, flags; const char *val, *val_end; } lexer_token_t; extern const char *symbol_names[TOKEN_END]; const char *lex_one(const char *str, const char *end, lexer_token_t *token); inline static VALUE token_to_rstr(lexer_token_t token) { return rb_enc_str_new(token.val, token.val_end - token.val, utf8_encoding); } #endif liquid-c-4.0.0/ext/liquid_c/liquid.c000066400000000000000000000013141337236435600172610ustar00rootroot00000000000000#include "liquid.h" #include "tokenizer.h" #include "variable.h" #include "lexer.h" #include "parser.h" #include "block.h" VALUE mLiquid, mLiquidC, cLiquidSyntaxError, cLiquidVariable, cLiquidTemplate; rb_encoding *utf8_encoding; void Init_liquid_c(void) { utf8_encoding = rb_utf8_encoding(); mLiquid = rb_define_module("Liquid"); mLiquidC = rb_define_module_under(mLiquid, "C"); cLiquidSyntaxError = rb_const_get(mLiquid, rb_intern("SyntaxError")); cLiquidVariable = rb_const_get(mLiquid, rb_intern("Variable")); cLiquidTemplate = rb_const_get(mLiquid, rb_intern("Template")); init_liquid_tokenizer(); init_liquid_parser(); init_liquid_variable(); init_liquid_block(); } liquid-c-4.0.0/ext/liquid_c/liquid.h000066400000000000000000000003561337236435600172730ustar00rootroot00000000000000#if !defined(LIQUID_H) #define LIQUID_H #include #include #include extern VALUE mLiquid, mLiquidC, cLiquidSyntaxError, cLiquidVariable, cLiquidTemplate; extern rb_encoding *utf8_encoding; #endif liquid-c-4.0.0/ext/liquid_c/parser.c000066400000000000000000000131011337236435600172630ustar00rootroot00000000000000#include "liquid.h" #include "parser.h" #include "lexer.h" static VALUE cLiquidRangeLookup, cLiquidVariableLookup, cRange, vLiquidExpressionLiterals; static ID idToI, idEvaluate; void init_parser(parser_t *p, const char *str, const char *end) { p->str_end = end; p->cur.type = p->next.type = TOKEN_EOS; p->str = lex_one(str, end, &p->cur); p->str = lex_one(p->str, end, &p->next); } lexer_token_t parser_consume_any(parser_t *p) { lexer_token_t cur = p->cur; p->cur = p->next; p->next.type = TOKEN_EOS; p->str = lex_one(p->str, p->str_end, &p->next); return cur; } lexer_token_t parser_must_consume(parser_t *p, unsigned char type) { if (p->cur.type != type) { rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Expected %s but found %s", symbol_names[type], symbol_names[p->cur.type]); } return parser_consume_any(p); } lexer_token_t parser_consume(parser_t *p, unsigned char type) { if (p->cur.type != type) { lexer_token_t zero = {0}; return zero; } return parser_consume_any(p); } inline static int rstring_eq(VALUE rstr, const char *str) { size_t str_len = strlen(str); return TYPE(rstr) == T_STRING && str_len == (size_t)RSTRING_LEN(rstr) && memcmp(RSTRING_PTR(rstr), str, str_len) == 0; } static VALUE parse_number(parser_t *p) { VALUE out; lexer_token_t token = parser_must_consume(p, TOKEN_NUMBER); // Set up sentinel for rb_cstr operations. char tmp = *token.val_end; *(char *)token.val_end = '\0'; if (token.flags & TOKEN_FLOAT_NUMBER) { out = DBL2NUM(rb_cstr_to_dbl(token.val, 1)); } else { out = rb_cstr_to_inum(token.val, 10, 1); } *(char *)token.val_end = tmp; return out; } static VALUE parse_range(parser_t *p) { parser_must_consume(p, TOKEN_OPEN_ROUND); VALUE args[2]; args[0] = parse_expression(p); parser_must_consume(p, TOKEN_DOTDOT); args[1] = parse_expression(p); parser_must_consume(p, TOKEN_CLOSE_ROUND); if (rb_respond_to(args[0], idEvaluate) || rb_respond_to(args[1], idEvaluate)) return rb_class_new_instance(2, args, cLiquidRangeLookup); return rb_class_new_instance(2, args, cRange); } static VALUE parse_variable(parser_t *p) { VALUE name, lookups = rb_ary_new(), lookup; unsigned long long command_flags = 0; if (parser_consume(p, TOKEN_OPEN_SQUARE).type) { name = parse_expression(p); parser_must_consume(p, TOKEN_CLOSE_SQUARE); } else { name = token_to_rstr(parser_must_consume(p, TOKEN_IDENTIFIER)); } while (true) { if (p->cur.type == TOKEN_OPEN_SQUARE) { parser_consume_any(p); lookup = parse_expression(p); parser_must_consume(p, TOKEN_CLOSE_SQUARE); rb_ary_push(lookups, lookup); } else if (p->cur.type == TOKEN_DOT) { int has_space_affix = parser_consume_any(p).flags & TOKEN_SPACE_AFFIX; lookup = token_to_rstr(parser_must_consume(p, TOKEN_IDENTIFIER)); if (has_space_affix) rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected dot"); if (rstring_eq(lookup, "size") || rstring_eq(lookup, "first") || rstring_eq(lookup, "last")) command_flags |= 1 << RARRAY_LEN(lookups); rb_ary_push(lookups, lookup); } else { break; } } if (RARRAY_LEN(lookups) == 0) { VALUE literal = rb_hash_lookup2(vLiquidExpressionLiterals, name, Qundef); if (literal != Qundef) return literal; } VALUE args[4] = {Qfalse, name, lookups, INT2FIX(command_flags)}; return rb_class_new_instance(4, args, cLiquidVariableLookup); } VALUE parse_expression(parser_t *p) { switch (p->cur.type) { case TOKEN_IDENTIFIER: case TOKEN_OPEN_SQUARE: return parse_variable(p); case TOKEN_NUMBER: return parse_number(p); case TOKEN_OPEN_ROUND: return parse_range(p); case TOKEN_STRING: { lexer_token_t token = parser_consume_any(p); token.val++; token.val_end--; return token_to_rstr(token); } } if (p->cur.type == TOKEN_EOS) { rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p->cur.type]); } else { rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s, \"%.*s\"] is not a valid expression", symbol_names[p->cur.type], (int)(p->cur.val_end - p->cur.val), p->cur.val); } return Qnil; } static VALUE rb_parse_expression(VALUE self, VALUE markup) { StringValue(markup); char *start = RSTRING_PTR(markup); parser_t p; init_parser(&p, start, start + RSTRING_LEN(markup)); if (p.cur.type == TOKEN_EOS) return Qnil; VALUE expr = parse_expression(&p); if (p.cur.type != TOKEN_EOS) rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p.cur.type]); return expr; } void init_liquid_parser(void) { idToI = rb_intern("to_i"); idEvaluate = rb_intern("evaluate"); cLiquidRangeLookup = rb_const_get(mLiquid, rb_intern("RangeLookup")); cRange = rb_const_get(rb_cObject, rb_intern("Range")); cLiquidVariableLookup = rb_const_get(mLiquid, rb_intern("VariableLookup")); VALUE cLiquidExpression = rb_const_get(mLiquid, rb_intern("Expression")); rb_define_singleton_method(cLiquidExpression, "c_parse", rb_parse_expression, 1); vLiquidExpressionLiterals = rb_const_get(cLiquidExpression, rb_intern("LITERALS")); } liquid-c-4.0.0/ext/liquid_c/parser.h000066400000000000000000000010101337236435600172640ustar00rootroot00000000000000#if !defined(LIQUID_PARSER_H) #define LIQUID_PARSER_H #include "lexer.h" typedef struct parser { lexer_token_t cur, next; const char *str, *str_end; } parser_t; void init_parser(parser_t *parser, const char *str, const char *end); lexer_token_t parser_must_consume(parser_t *parser, unsigned char type); lexer_token_t parser_consume(parser_t *parser, unsigned char type); lexer_token_t parser_consume_any(parser_t *parser); VALUE parse_expression(parser_t *parser); void init_liquid_parser(void); #endif liquid-c-4.0.0/ext/liquid_c/tokenizer.c000066400000000000000000000121761337236435600200140ustar00rootroot00000000000000#include "liquid.h" #include "tokenizer.h" VALUE cLiquidTokenizer; static void tokenizer_mark(void *ptr) { tokenizer_t *tokenizer = ptr; rb_gc_mark(tokenizer->source); } static void tokenizer_free(void *ptr) { tokenizer_t *tokenizer = ptr; xfree(tokenizer); } static size_t tokenizer_memsize(const void *ptr) { return ptr ? sizeof(tokenizer_t) : 0; } const rb_data_type_t tokenizer_data_type = { "liquid_tokenizer", { tokenizer_mark, tokenizer_free, tokenizer_memsize, }, #if defined(RUBY_TYPED_FREE_IMMEDIATELY) NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY #endif }; static VALUE tokenizer_allocate(VALUE klass) { VALUE obj; tokenizer_t *tokenizer; obj = TypedData_Make_Struct(klass, tokenizer_t, &tokenizer_data_type, tokenizer); tokenizer->source = Qnil; return obj; } static VALUE tokenizer_initialize_method(VALUE self, VALUE source, VALUE line_numbers) { tokenizer_t *tokenizer; Check_Type(source, T_STRING); Tokenizer_Get_Struct(self, tokenizer); source = rb_str_dup_frozen(source); tokenizer->source = source; tokenizer->cursor = RSTRING_PTR(source); tokenizer->length = RSTRING_LEN(source); tokenizer->lstrip_flag = 0; // tokenizer->line_number keeps track of the current line number or it is 0 // to indicate that line numbers aren't being calculated tokenizer->line_number = RTEST(line_numbers) ? 1 : 0; return Qnil; } void tokenizer_next(tokenizer_t *tokenizer, token_t *token) { if (tokenizer->length <= 0) { memset(token, 0, sizeof(*token)); return; } const char *cursor = tokenizer->cursor; const char *last = cursor + tokenizer->length - 1; token->str = cursor; token->type = TOKEN_RAW; token->lstrip = 0; token->rstrip = 0; while (cursor < last) { if (*cursor++ != '{') continue; char c = *cursor++; if (c != '%' && c != '{') continue; if (cursor <= last && *cursor == '-') { cursor++; token->rstrip = 1; } if (cursor - tokenizer->cursor > (ptrdiff_t)(2 + token->rstrip)) { token->type = TOKEN_RAW; cursor -= 2 + token->rstrip; token->lstrip = tokenizer->lstrip_flag; tokenizer->lstrip_flag = 0; goto found; } tokenizer->lstrip_flag = 0; token->type = TOKEN_INVALID; token->lstrip = token->rstrip; token->rstrip = 0; if (c == '%') { while (cursor < last) { if (*cursor++ != '%') continue; c = *cursor++; while (c == '%' && cursor <= last) c = *cursor++; if (c != '}') continue; token->type = TOKEN_TAG; if(cursor[-3] == '-') token->rstrip = tokenizer->lstrip_flag = 1; goto found; } // unterminated tag cursor = tokenizer->cursor + 2; tokenizer->lstrip_flag = 0; goto found; } else { while (cursor < last) { if (*cursor++ != '}') continue; if (*cursor++ != '}') { // variable incomplete end, used to end raw tags cursor--; goto found; } token->type = TOKEN_VARIABLE; if(cursor[-3] == '-') token->rstrip = tokenizer->lstrip_flag = 1; goto found; } // unterminated variable cursor = tokenizer->cursor + 2; tokenizer->lstrip_flag = 0; goto found; } } cursor = last + 1; token->lstrip = tokenizer->lstrip_flag; tokenizer->lstrip_flag = 0; found: token->length = cursor - tokenizer->cursor; tokenizer->cursor += token->length; tokenizer->length -= token->length; if (tokenizer->line_number) { const char *cursor = token->str; const char *end = token->str + token->length; while (cursor < end) { if (*cursor == '\n') tokenizer->line_number++; cursor++; } } } static VALUE tokenizer_shift_method(VALUE self) { tokenizer_t *tokenizer; Tokenizer_Get_Struct(self, tokenizer); token_t token; tokenizer_next(tokenizer, &token); if (!token.type) return Qnil; return rb_enc_str_new(token.str, token.length, utf8_encoding); } static VALUE tokenizer_line_number_method(VALUE self) { tokenizer_t *tokenizer; Tokenizer_Get_Struct(self, tokenizer); if (tokenizer->line_number == 0) return Qnil; return UINT2NUM(tokenizer->line_number); } void init_liquid_tokenizer() { cLiquidTokenizer = rb_define_class_under(mLiquidC, "Tokenizer", rb_cObject); rb_define_alloc_func(cLiquidTokenizer, tokenizer_allocate); rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method, 2); rb_define_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0); rb_define_method(cLiquidTokenizer, "line_number", tokenizer_line_number_method, 0); } liquid-c-4.0.0/ext/liquid_c/tokenizer.h000066400000000000000000000014141337236435600200120ustar00rootroot00000000000000#if !defined(LIQUID_TOKENIZER_H) #define LIQUID_TOKENIZER_H enum token_type { TOKENIZER_TOKEN_NONE = 0, TOKEN_INVALID, TOKEN_RAW, TOKEN_TAG, TOKEN_VARIABLE }; typedef struct token { enum token_type type; const char *str; long length; unsigned int lstrip; unsigned int rstrip; } token_t; typedef struct tokenizer { VALUE source; const char *cursor; long length; unsigned int line_number; unsigned int lstrip_flag; } tokenizer_t; extern VALUE cLiquidTokenizer; extern const rb_data_type_t tokenizer_data_type; #define Tokenizer_Get_Struct(obj, sval) TypedData_Get_Struct(obj, tokenizer_t, &tokenizer_data_type, sval) void init_liquid_tokenizer(); void tokenizer_next(tokenizer_t *tokenizer, token_t *token); #endif liquid-c-4.0.0/ext/liquid_c/variable.c000066400000000000000000000031721337236435600175630ustar00rootroot00000000000000#include "liquid.h" #include "variable.h" #include "parser.h" #include static VALUE rb_variable_parse(VALUE self, VALUE markup, VALUE filters) { StringValue(markup); char *start = RSTRING_PTR(markup); parser_t p; init_parser(&p, start, start + RSTRING_LEN(markup)); if (p.cur.type == TOKEN_EOS) return Qnil; VALUE name = parse_expression(&p); while (parser_consume(&p, TOKEN_PIPE).type) { lexer_token_t filter_name = parser_must_consume(&p, TOKEN_IDENTIFIER); VALUE filter_args = rb_ary_new(), keyword_args = Qnil, filter; if (parser_consume(&p, TOKEN_COLON).type) { do { if (p.cur.type == TOKEN_IDENTIFIER && p.next.type == TOKEN_COLON) { VALUE key = token_to_rstr(parser_consume_any(&p)); parser_consume_any(&p); if (keyword_args == Qnil) keyword_args = rb_hash_new(); rb_hash_aset(keyword_args, key, parse_expression(&p)); } else { rb_ary_push(filter_args, parse_expression(&p)); } } while (parser_consume(&p, TOKEN_COMMA).type); } if (keyword_args == Qnil) { filter = rb_ary_new3(2, token_to_rstr(filter_name), filter_args); } else { filter = rb_ary_new3(3, token_to_rstr(filter_name), filter_args, keyword_args); } rb_ary_push(filters, filter); } parser_must_consume(&p, TOKEN_EOS); return name; } void init_liquid_variable(void) { rb_define_singleton_method(cLiquidVariable, "c_strict_parse", rb_variable_parse, 2); } liquid-c-4.0.0/ext/liquid_c/variable.h000066400000000000000000000001451337236435600175650ustar00rootroot00000000000000#if !defined(LIQUID_VARIABLE_H) #define LIQUID_VARIABLE_H void init_liquid_variable(void); #endif liquid-c-4.0.0/lib/000077500000000000000000000000001337236435600140045ustar00rootroot00000000000000liquid-c-4.0.0/lib/liquid/000077500000000000000000000000001337236435600152735ustar00rootroot00000000000000liquid-c-4.0.0/lib/liquid/c.rb000066400000000000000000000036071337236435600160500ustar00rootroot00000000000000require 'liquid/c/version' require 'liquid' require 'liquid_c' module Liquid module C @enabled = true class << self attr_accessor :enabled end end end Liquid::Tokenizer.class_eval do def self.new(source, line_numbers = false) if Liquid::C.enabled Liquid::C::Tokenizer.new(source.to_s, line_numbers) else super end end end Liquid::BlockBody.class_eval do alias_method :ruby_parse, :parse def parse(tokens, options) if Liquid::C.enabled && !options[:profile] c_parse(tokens, options) { |t, m| yield t, m } else ruby_parse(tokens, options) { |t, m| yield t, m } end end end Liquid::Variable.class_eval do alias_method :ruby_lax_parse, :lax_parse alias_method :ruby_strict_parse, :strict_parse def lax_parse(markup) stats = options[:stats_callbacks] stats[:variable_parse].call if stats if Liquid::C.enabled begin return strict_parse(markup) rescue Liquid::SyntaxError stats[:variable_fallback].call if stats end end ruby_lax_parse(markup) end def strict_parse(markup) if Liquid::C.enabled @name = Liquid::Variable.c_strict_parse(markup, @filters = []) else ruby_strict_parse(markup) end end end Liquid::VariableLookup.class_eval do alias_method :ruby_initialize, :initialize def initialize(markup, name = nil, lookups = nil, command_flags = nil) if Liquid::C.enabled && markup == false @name = name @lookups = lookups @command_flags = command_flags else ruby_initialize(markup) end end end Liquid::Expression.class_eval do class << self alias_method :ruby_parse, :parse def parse(markup) return nil unless markup if Liquid::C.enabled begin return c_parse(markup) rescue Liquid::SyntaxError end end ruby_parse(markup) end end end liquid-c-4.0.0/lib/liquid/c/000077500000000000000000000000001337236435600155155ustar00rootroot00000000000000liquid-c-4.0.0/lib/liquid/c/version.rb000066400000000000000000000000711337236435600175250ustar00rootroot00000000000000module Liquid module C VERSION = "4.0.0" end end liquid-c-4.0.0/liquid-c.gemspec000066400000000000000000000021541337236435600163140ustar00rootroot00000000000000# coding: utf-8 lib = File.expand_path('../lib', __FILE__) $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) require 'liquid/c/version' Gem::Specification.new do |spec| spec.name = "liquid-c" spec.version = Liquid::C::VERSION spec.authors = ["Justin Li", "Dylan Thacker-Smith"] spec.email = ["gems@shopify.com"] spec.summary = "Liquid performance extension in C" spec.homepage = "https://github.com/shopify/liquid-c" spec.license = "MIT" spec.extensions = ['ext/liquid_c/extconf.rb'] spec.files = `git ls-files -z`.split("\x0") spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) spec.require_paths = ["lib"] spec.add_dependency 'liquid', '>= 3.0.0' spec.add_development_dependency "bundler", "~> 1.5" spec.add_development_dependency "rake" spec.add_development_dependency 'rake-compiler' spec.add_development_dependency 'minitest' spec.add_development_dependency 'stackprof' if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.1.0") end liquid-c-4.0.0/performance.rb000066400000000000000000000004241337236435600160640ustar00rootroot00000000000000require 'liquid' require 'liquid/c' if ARGV.shift == "c" liquid_lib_dir = $LOAD_PATH.detect{ |p| File.exists?(File.join(p, 'liquid.rb')) } script = ARGV.shift or abort("unspecified performance script") require File.join(File.dirname(liquid_lib_dir), "performance/#{script}") liquid-c-4.0.0/test/000077500000000000000000000000001337236435600142155ustar00rootroot00000000000000liquid-c-4.0.0/test/liquid_test.rb000066400000000000000000000005421337236435600170710ustar00rootroot00000000000000liquid_lib_dir = $LOAD_PATH.detect{ |p| File.exists?(File.join(p, 'liquid.rb')) } liquid_test_dir = File.join(File.dirname(liquid_lib_dir), 'test') $LOAD_PATH << liquid_test_dir require 'test_helper' require 'liquid/c' test_files = FileList[File.join(liquid_test_dir, 'integration/**/*_test.rb')] test_files.each do |test_file| require test_file end liquid-c-4.0.0/test/test_helper.rb000066400000000000000000000000561337236435600170610ustar00rootroot00000000000000require 'minitest/autorun' require 'liquid/c' liquid-c-4.0.0/test/unit/000077500000000000000000000000001337236435600151745ustar00rootroot00000000000000liquid-c-4.0.0/test/unit/tokenizer_test.rb000066400000000000000000000025351337236435600205770ustar00rootroot00000000000000# encoding: utf-8 require 'test_helper' class TokenizerTest < MiniTest::Unit::TestCase def test_tokenize_strings assert_equal [' '], tokenize(' ') assert_equal ['hello world'], tokenize('hello world') end def test_tokenize_variables assert_equal ['{{funk}}'], tokenize('{{funk}}') assert_equal [' ', '{{funk}}', ' '], tokenize(' {{funk}} ') assert_equal [' ', '{{funk}}', ' ', '{{so}}', ' ', '{{brother}}', ' '], tokenize(' {{funk}} {{so}} {{brother}} ') assert_equal [' ', '{{ funk }}', ' '], tokenize(' {{ funk }} ') end def test_tokenize_blocks assert_equal ['{%comment%}'], tokenize('{%comment%}') assert_equal [' ', '{%comment%}', ' '], tokenize(' {%comment%} ') assert_equal [' ', '{%comment%}', ' ', '{%endcomment%}', ' '], tokenize(' {%comment%} {%endcomment%} ') assert_equal [' ', '{% comment %}', ' ', '{% endcomment %}', ' '], tokenize(" {% comment %} {% endcomment %} ") end def test_utf8_encoded_template source = 'auswählen' assert_equal Encoding::UTF_8, source.encoding output = tokenize(source) assert_equal [Encoding::UTF_8], output.map(&:encoding) assert_equal [source], output end private def tokenize(source) tokenizer = Liquid::C::Tokenizer.new(source, false) tokens = [] while t = tokenizer.shift tokens << t end tokens end end liquid-c-4.0.0/test/unit/variable_test.rb000066400000000000000000000075241337236435600203550ustar00rootroot00000000000000# encoding: utf-8 require 'test_helper' class VariableTest < MiniTest::Unit::TestCase def test_variable_parse assert_equal [lookup('hello'), []], variable_parse('hello') assert_equal ['world', []], variable_parse(' "world" ') assert_equal [lookup('hello["world"]'), []], variable_parse(' hello["world"] ') assert_equal [nil, []], variable_parse('') assert_equal [lookup('question?'), []], variable_parse('question?') assert_equal [lookup('[meta]'), []], variable_parse('[meta]') assert_equal [lookup('a-b'), []], variable_parse('a-b') assert_equal [lookup('a-2'), []], variable_parse('a-2') end def test_strictness assert_raises(Liquid::SyntaxError) { variable_parse(' hello["world\']" ') } assert_raises(Liquid::SyntaxError) { variable_parse('-..') } assert_raises(Liquid::SyntaxError) { variable_parse('question?mark') } assert_raises(Liquid::SyntaxError) { variable_parse('123.foo') } assert_raises(Liquid::SyntaxError) { variable_parse(' | nothing') } ['a .b', 'a. b', 'a . b'].each do |var| assert_raises(Liquid::SyntaxError) { variable_parse(var) } end ['a -b', 'a- b', 'a - b'].each do |var| assert_raises(Liquid::SyntaxError) { variable_parse(var) } end end def test_literals assert_equal [true, []], variable_parse('true') assert_equal [nil, []], variable_parse('nil') assert_equal [123.4, []], variable_parse('123.4') assert_equal [lookup('[blank]'), []], variable_parse('[blank]') assert_equal [lookup(false, true, [Liquid::Expression::LITERALS['blank']], 0), []], variable_parse('[true][blank]') assert_equal [lookup('[true][blank]'), []], variable_parse('[true][blank]') assert_equal [lookup('x["size"]'), []], variable_parse('x["size"]') end def test_variable_filter name = lookup('name') assert_equal [name, [['filter', []]]], variable_parse(' name | filter ') assert_equal [name, [['filter1', []], ['filter2', []]]], variable_parse(' name | filter1 | filter2 ') end def test_variable_filter_args name = lookup('name') abc = lookup('abc') assert_equal [name, [['filter', [abc]]]], variable_parse(' name | filter: abc ') assert_equal [name, [['filter1', [abc]], ['filter2', [abc]]]], variable_parse(' name | filter1: abc | filter2: abc ') assert_equal [name, [['filter', [lookup('a')], {'b' => lookup('c'), 'd' => lookup('e')}]]], variable_parse('name | filter : a , b : c , d : e') assert_raises Liquid::SyntaxError do variable_parse('name | filter : a : b : c : d : e') end end def test_unicode_strings assert_equal ['å߀êùidhtлsԁѵ߀ráƙìstɦeƅêstpcmáѕterrãcêcհèrr', []], variable_parse('"å߀êùidhtлsԁѵ߀ráƙìstɦeƅêstpcmáѕterrãcêcհèrr"') end def test_broken_unicode_errors err = assert_raises(Liquid::SyntaxError) do Liquid::Template.parse("test {{ \xC2\xA0 test }}", error_mode: :strict) end assert err.message end def test_callbacks variable_parses = 0 variable_fallbacks = 0 callbacks = { variable_parse: lambda { variable_parses += 1 }, variable_fallback: lambda { variable_fallbacks += 1 } } create_variable('abc', error_mode: :lax, stats_callbacks: callbacks) assert_equal 1, variable_parses assert_equal 0, variable_fallbacks create_variable('@!#', error_mode: :lax, stats_callbacks: callbacks) assert_equal 2, variable_parses assert_equal 1, variable_fallbacks end private def create_variable(markup, options={}) Liquid::Variable.new(markup, Liquid::ParseContext.new(options)) end def variable_parse(markup) name = Liquid::Variable.c_strict_parse(markup, filters = []) [name, filters] end def lookup(*args) Liquid::VariableLookup.new(*args) end end