escape-utils-0.2.4/0000755000175000017500000000000011772525275013434 5ustar tfheentfheenescape-utils-0.2.4/spec/0000755000175000017500000000000011772525275014366 5ustar tfheentfheenescape-utils-0.2.4/spec/rcov.opts0000644000175000017500000000010111772525275016236 0ustar tfheentfheen--exclude spec,gem --text-summary --sort coverage --sort-reverse escape-utils-0.2.4/spec/uri/0000755000175000017500000000000011772525275015165 5ustar tfheentfheenescape-utils-0.2.4/spec/uri/escape_spec.rb0000644000175000017500000000307011772525275017764 0ustar tfheentfheenrequire File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb') require 'uri' describe EscapeUtils, "escape_uri" do it "should respond to escape_uri" do EscapeUtils.should respond_to(:escape_uri) end it "should escape each byte exactly like URI.escape" do (0..255).each do |i| c = i.chr EscapeUtils.escape_uri(c).should eql(URI.escape(c)) end end # NOTE: from Rack's test suite it "should escape a url containing tags" do EscapeUtils.escape_uri("fobar").should eql("fo%3Co%3Ebar") end # NOTE: from Rack's test suite it "should escape a url with spaces" do EscapeUtils.escape_uri("a space").should eql("a%20space") EscapeUtils.escape_uri("a sp ace ").should eql("a%20%20%20sp%20ace%20") end # NOTE: from Rack's test suite it "should escape correctly for multibyte characters" do matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto EscapeUtils.escape_uri(matz_name).should eql('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8') matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto EscapeUtils.escape_uri(matz_name_sep).should eql('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8') end if RUBY_VERSION =~ /^1.9/ it "return value should be in original string's encoding" do str = "http://www.homerun.com/".encode('us-ascii') EscapeUtils.escape_uri(str).encoding.should eql(Encoding.find('us-ascii')) str = "http://www.homerun.com/".encode('utf-8') EscapeUtils.escape_uri(str).encoding.should eql(Encoding.find('utf-8')) end end endescape-utils-0.2.4/spec/uri/unescape_spec.rb0000644000175000017500000000434111772525275020331 0ustar tfheentfheen# encoding: UTF-8 require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb') describe EscapeUtils, "unescape_uri" do it "should respond to unescape_uri" do EscapeUtils.should respond_to(:unescape_uri) end it "should unescape a basic url" do EscapeUtils.unescape_uri("http%3A%2F%2Fwww.homerun.com%2F").should eql("http://www.homerun.com/") EscapeUtils.unescape_uri("http://www.homerun.com/").should eql("http://www.homerun.com/") end it "should not be thrown by a standalone %" do EscapeUtils.unescape_uri("%").should eql("%") end it "should not be thrown by a trailing %" do EscapeUtils.unescape_uri("http%").should eql("http%") end # NOTE: from Rack's test suite it "should unescape a url containing tags" do EscapeUtils.unescape_uri("fo%3Co%3Ebar").should eql("fobar") end # NOTE: from Rack's test suite it "should unescape a url with spaces" do EscapeUtils.unescape_uri("a%20space").should eql("a space") EscapeUtils.unescape_uri("a%20%20%20sp%20ace%20").should eql("a sp ace ") EscapeUtils.unescape_uri("a+space").should eql("a+space") end # NOTE: from Rack's test suite it "should unescape a string of mixed characters" do EscapeUtils.unescape_uri("q1%212%22%27w%245%267%2Fz8%29%3F%5C").should eql("q1!2\"'w$5&7/z8)?\\") EscapeUtils.unescape_uri("q1!2%22'w$5&7/z8)?%5C").should eql("q1!2\"'w$5&7/z8)?\\") end # NOTE: from Rack's test suite it "should unescape correctly for multibyte characters" do matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto EscapeUtils.unescape_uri('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8').should eql(matz_name) matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto EscapeUtils.unescape_uri('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8').should eql(matz_name_sep) end if RUBY_VERSION =~ /^1.9/ it "return value should be in original string's encoding" do str = "http%3A%2F%2Fwww.homerun.com%2F".encode('us-ascii') EscapeUtils.unescape_uri(str).encoding.should eql(Encoding.find('us-ascii')) str = "http%3A%2F%2Fwww.homerun.com%2F".encode('utf-8') EscapeUtils.unescape_uri(str).encoding.should eql(Encoding.find('utf-8')) end end endescape-utils-0.2.4/spec/url/0000755000175000017500000000000011772525275015170 5ustar tfheentfheenescape-utils-0.2.4/spec/url/escape_spec.rb0000644000175000017500000000361111772525275017770 0ustar tfheentfheenrequire File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb') require 'cgi' describe EscapeUtils, "escape_url" do it "should respond to escape_url" do EscapeUtils.should respond_to(:escape_url) end it "should escape a basic url" do EscapeUtils.escape_url("http://www.homerun.com/").should eql("http%3A%2F%2Fwww.homerun.com%2F") end it "should escape each possible byte value exactly like CGI.escape" do (0..255).each do |i| c = i.chr EscapeUtils.escape_url(c).should eql(CGI.escape(c)) end end # NOTE: from Rack's test suite it "should escape a url containing tags" do EscapeUtils.escape_url("fobar").should eql("fo%3Co%3Ebar") end # NOTE: from Rack's test suite it "should escape a url with spaces" do EscapeUtils.escape_url("a space").should eql("a+space") EscapeUtils.escape_url("a sp ace ").should eql("a+++sp+ace+") end # NOTE: from Rack's test suite it "should escape a string of mixed characters" do EscapeUtils.escape_url("q1!2\"'w$5&7/z8)?\\").should eql("q1%212%22%27w%245%267%2Fz8%29%3F%5C") end # NOTE: from Rack's test suite it "should escape correctly for multibyte characters" do matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto EscapeUtils.escape_url(matz_name).should eql('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8') matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto EscapeUtils.escape_url(matz_name_sep).should eql('%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8') end if RUBY_VERSION =~ /^1.9/ it "return value should be in original string's encoding" do str = "http://www.homerun.com/".encode('us-ascii') EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('us-ascii')) str = "http://www.homerun.com/".encode('utf-8') EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('utf-8')) end end endescape-utils-0.2.4/spec/url/unescape_spec.rb0000644000175000017500000000434111772525275020334 0ustar tfheentfheen# encoding: UTF-8 require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb') describe EscapeUtils, "unescape_url" do it "should respond to unescape_url" do EscapeUtils.should respond_to(:unescape_url) end it "should unescape a basic url" do EscapeUtils.unescape_url("http%3A%2F%2Fwww.homerun.com%2F").should eql("http://www.homerun.com/") EscapeUtils.unescape_url("http://www.homerun.com/").should eql("http://www.homerun.com/") end it "should not be thrown by a standalone %" do EscapeUtils.unescape_url("%").should eql("%") end it "should not be thrown by a trailing %" do EscapeUtils.unescape_url("http%").should eql("http%") end # NOTE: from Rack's test suite it "should unescape a url containing tags" do EscapeUtils.unescape_url("fo%3Co%3Ebar").should eql("fobar") end # NOTE: from Rack's test suite it "should unescape a url with spaces" do EscapeUtils.unescape_url("a%20space").should eql("a space") EscapeUtils.unescape_url("a%20%20%20sp%20ace%20").should eql("a sp ace ") EscapeUtils.unescape_url("a+space").should eql("a space") end # NOTE: from Rack's test suite it "should unescape a string of mixed characters" do EscapeUtils.unescape_url("q1%212%22%27w%245%267%2Fz8%29%3F%5C").should eql("q1!2\"'w$5&7/z8)?\\") EscapeUtils.unescape_url("q1!2%22'w$5&7/z8)?%5C").should eql("q1!2\"'w$5&7/z8)?\\") end # NOTE: from Rack's test suite it "should unescape correctly for multibyte characters" do matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8').should eql(matz_name) matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8').should eql(matz_name_sep) end if RUBY_VERSION =~ /^1.9/ it "return value should be in original string's encoding" do str = "http%3A%2F%2Fwww.homerun.com%2F".encode('us-ascii') EscapeUtils.unescape_url(str).encoding.should eql(Encoding.find('us-ascii')) str = "http%3A%2F%2Fwww.homerun.com%2F".encode('utf-8') EscapeUtils.unescape_url(str).encoding.should eql(Encoding.find('utf-8')) end end endescape-utils-0.2.4/spec/javascript/0000755000175000017500000000000011772525275016534 5ustar tfheentfheenescape-utils-0.2.4/spec/javascript/escape_spec.rb0000644000175000017500000000235211772525275021335 0ustar tfheentfheenrequire File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb') describe EscapeUtils, "escape_javascript" do it "should respond to escape_javascript" do EscapeUtils.should respond_to(:escape_javascript) end # these are from the ActionView tests it "should return an empty string if passed nil" do EscapeUtils.escape_javascript(nil).should eql("") end it "should escape quotes and newlines" do EscapeUtils.escape_javascript(%(This "thing" is really\n netos\r\n\n')).should eql(%(This \\"thing\\" is really\\n netos\\n\\n\\')) end it "should escape backslashes" do EscapeUtils.escape_javascript(%(backslash\\test)).should eql(%(backslash\\\\test)) end it "should escape closed html tags" do EscapeUtils.escape_javascript(%(keep , but dont tags)).should eql(%(keep , but dont <\\/close> tags)) end if RUBY_VERSION =~ /^1.9/ it "return value should be in original string's encoding" do str = "dont tags".encode('us-ascii') EscapeUtils.escape_javascript(str).encoding.should eql(Encoding.find('us-ascii')) str = "dont tags".encode('utf-8') EscapeUtils.escape_javascript(str).encoding.should eql(Encoding.find('utf-8')) end end end escape-utils-0.2.4/spec/javascript/unescape_spec.rb0000644000175000017500000000251711772525275021703 0ustar tfheentfheenrequire File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb') describe EscapeUtils, "unescape_javascript" do it "should respond to unescape_javascript" do EscapeUtils.should respond_to(:unescape_javascript) end # these are from the ActionView tests it "should return an empty string if passed nil" do EscapeUtils.unescape_javascript(nil).should eql("") end it "should unescape quotes and newlines" do EscapeUtils.unescape_javascript(%(This \\"thing\\" is really\\n netos\\n\\n\\')).should eql(%(This "thing" is really\n netos\n\n')) end it "should unescape backslashes" do EscapeUtils.unescape_javascript(%(backslash\\\\test)).should eql(%(backslash\\test)) end it "should unescape closed html tags" do EscapeUtils.unescape_javascript(%(dont <\\/close> tags)).should eql(%(dont tags)) end it "should pass through standalone '\'" do EscapeUtils.unescape_javascript("\\").should eql("\\") end if RUBY_VERSION =~ /^1.9/ it "return value should be in original string's encoding" do str = "dont <\\/close> tags".encode('us-ascii') EscapeUtils.unescape_javascript(str).encoding.should eql(Encoding.find('us-ascii')) str = "dont <\\/close> tags".encode('utf-8') EscapeUtils.unescape_javascript(str).encoding.should eql(Encoding.find('utf-8')) end end endescape-utils-0.2.4/spec/query/0000755000175000017500000000000011772525275015533 5ustar tfheentfheenescape-utils-0.2.4/spec/query/escape_spec.rb0000644000175000017500000000330011772525275020326 0ustar tfheentfheenrequire File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb') describe EscapeUtils, "escape_url" do it "should respond to escape_url" do EscapeUtils.should respond_to(:escape_url) end it "should escape a basic url" do EscapeUtils.escape_url("http://www.homerun.com/").should eql("http%3A%2F%2Fwww.homerun.com%2F") end # NOTE: from Rack's test suite it "should escape a url containing tags" do EscapeUtils.escape_url("fobar").should eql("fo%3Co%3Ebar") end # NOTE: from Rack's test suite it "should escape a url with spaces" do EscapeUtils.escape_url("a space").should eql("a+space") EscapeUtils.escape_url("a sp ace ").should eql("a+++sp+ace+") end # NOTE: from Rack's test suite it "should escape a string of mixed characters" do EscapeUtils.escape_url("q1!2\"'w$5&7/z8)?\\").should eql("q1%212%22%27w%245%267%2Fz8%29%3F%5C") end # NOTE: from Rack's test suite it "should escape correctly for multibyte characters" do matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto EscapeUtils.escape_url(matz_name).should eql('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8') matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto EscapeUtils.escape_url(matz_name_sep).should eql('%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8') end if RUBY_VERSION =~ /^1.9/ it "return value should be in original string's encoding" do str = "http://www.homerun.com/".encode('us-ascii') EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('us-ascii')) str = "http://www.homerun.com/".encode('utf-8') EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('utf-8')) end end endescape-utils-0.2.4/spec/query/unescape_spec.rb0000644000175000017500000000340511772525275020677 0ustar tfheentfheen# encoding: UTF-8 require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb') describe EscapeUtils, "unescape_url" do it "should respond to unescape_url" do EscapeUtils.should respond_to(:unescape_url) end it "should unescape a basic url" do EscapeUtils.unescape_url("http%3A%2F%2Fwww.homerun.com%2F").should eql("http://www.homerun.com/") end # NOTE: from Rack's test suite it "should unescape a url containing tags" do EscapeUtils.unescape_url("fo%3Co%3Ebar").should eql("fobar") end # NOTE: from Rack's test suite it "should unescape a url with spaces" do EscapeUtils.unescape_url("a+space").should eql("a space") EscapeUtils.unescape_url("a+++sp+ace+").should eql("a sp ace ") end # NOTE: from Rack's test suite it "should unescape a string of mixed characters" do EscapeUtils.unescape_url("q1%212%22%27w%245%267%2Fz8%29%3F%5C").should eql("q1!2\"'w$5&7/z8)?\\") end # NOTE: from Rack's test suite it "should unescape correctly for multibyte characters" do matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8').should eql(matz_name) matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8').should eql(matz_name_sep) end if RUBY_VERSION =~ /^1.9/ it "return value should be in original string's encoding" do str = "http%3A%2F%2Fwww.homerun.com%2F".encode('us-ascii') EscapeUtils.unescape_url(str).encoding.should eql(Encoding.find('us-ascii')) str = "http%3A%2F%2Fwww.homerun.com%2F".encode('utf-8') EscapeUtils.unescape_url(str).encoding.should eql(Encoding.find('utf-8')) end end endescape-utils-0.2.4/spec/html/0000755000175000017500000000000011772525275015332 5ustar tfheentfheenescape-utils-0.2.4/spec/html/escape_spec.rb0000644000175000017500000000333611772525275020136 0ustar tfheentfheenrequire File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb') describe EscapeUtils, "escape_html" do it "should respond to escape_html" do EscapeUtils.should respond_to(:escape_html) end it "should escape a basic html tag, also escaping the '/' character if the secure parameter is true" do EscapeUtils.escape_html("").should eql("<some_tag/>") end it "should escape a basic html tag, not escaping the '/' character if the secure parameter is false" do EscapeUtils.escape_html("", false).should eql("<some_tag/>") end it "should escape a basic html tag, not escaping the '/' character if EscapeUtils.html_secure is false" do EscapeUtils.html_secure = false EscapeUtils.escape_html("").should eql("<some_tag/>") EscapeUtils.html_secure = true end it "should escape double-quotes" do EscapeUtils.escape_html("").should eql("<some_tag some_attr="some value"/>") end it "should escape single-quotes" do EscapeUtils.escape_html("").should eql("<some_tag some_attr='some value'/>") end it "should escape the & character" do EscapeUtils.escape_html("Bourbon & Branch").should eql("<b>Bourbon & Branch</b>") end if RUBY_VERSION =~ /^1.9/ it "return value should be in original string's encoding" do str = "Bourbon & Branch".encode('us-ascii') EscapeUtils.escape_html(str).encoding.should eql(Encoding.find('us-ascii')) str = "Bourbon & Branch".encode('utf-8') EscapeUtils.escape_html(str).encoding.should eql(Encoding.find('utf-8')) end end endescape-utils-0.2.4/spec/html/unescape_spec.rb0000644000175000017500000000266711772525275020507 0ustar tfheentfheenrequire File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb') describe EscapeUtils, "unescape_html" do it "should respond to unescape_html" do EscapeUtils.should respond_to(:unescape_html) end it "should unescape a basic html tag" do EscapeUtils.unescape_html("<some_tag/>").should eql("") end it "should unescape double-quotes" do EscapeUtils.unescape_html("<some_tag some_attr="some value"/>").should eql("") end it "should unescape single-quotes" do EscapeUtils.unescape_html("<some_tag some_attr='some value'/>").should eql("") end it "should unescape the & character" do EscapeUtils.unescape_html("<b>Bourbon & Branch</b>").should eql("Bourbon & Branch") end it "should pass through incompletely escaped tags" do EscapeUtils.unescape_html("&").should eql("&") EscapeUtils.unescape_html("<").should eql("<") end if RUBY_VERSION =~ /^1.9/ it "return value should be in original string's encoding" do str = "<b>Bourbon & Branch</b>".encode('us-ascii') EscapeUtils.unescape_html(str).encoding.should eql(Encoding.find('us-ascii')) str = "<b>Bourbon & Branch</b>".encode('utf-8') EscapeUtils.unescape_html(str).encoding.should eql(Encoding.find('utf-8')) end end endescape-utils-0.2.4/spec/html_safety_spec.rb0000644000175000017500000000153711772525275020252 0ustar tfheentfheenrequire File.expand_path(File.dirname(__FILE__) + '/spec_helper.rb') class Object def html_safe? false end end class TestSafeBuffer < String def html_safe? true end def html_safe self end def to_s self end end class String def html_safe TestSafeBuffer.new(self) end end include EscapeUtils::HtmlSafety describe EscapeUtils::HtmlSafety do it "should escape unsafe strings and make them safe" do escaped = _escape_html("unsafe") escaped.should eql("<strong>unsafe</strong>") escaped.should be_html_safe end it "shouldn't escape safe strings" do _escape_html("

safe string

".html_safe).should eql("

safe string

") end it "should work with non strings" do _escape_html(5).should eql("5") _escape_html(:hello).should eql("hello") end end escape-utils-0.2.4/spec/spec_helper.rb0000644000175000017500000000026411772525275017206 0ustar tfheentfheen$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..') $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib') require 'rspec' require 'escape_utils' escape-utils-0.2.4/CHANGELOG.md0000644000175000017500000000442211772525275015247 0ustar tfheentfheen# Changelog ## 0.2.4 (September 7th, 2011) * swap out custom escaping routines for houdini - https://github.com/tanoku/houdini * add RSTRING_NOT_MODIFIED define for a Rubinius speedup ## 0.2.3 (March 9th, 2011) * change encoding strategy to simply return strings in the encoding the input string was in, not taking into account Encoding.default_internal ## 0.2.2 (February 25th, 2011) * minor fix for Rubinius compatibility ## 0.2.1 (February 21st, 2011) * fix buffer over read in unescape_url and unescape_uri ## 0.2.0 (February 8th, 2011) * fixed a couple of compilation warnings on 1.9.3 * moved to rspec2 * remove hard-conversion to utf-8 to preserve the string's original encoding * moved to rake-compiler, Bundler * pass through incompletely escaped data on unescaping * added tilde to escape_{uri,url}specs (It's a difference between CGI.escape and URI.escape) * escape_uri and escape_url now match their Ruby counterparts ** escape_uri is used where URI.escape is, and escape_url is used where CGI.escape is used. * performance and memory usage optimizations ## 0.1.9 (October 15th, 2010) * add a flag as an optional 2nd parameter to EscapeUtils.escape_html to disable/enable the escaping of the '/' character. Defaults to the new flag EscapeUtils.html_secure ## 0.1.8 (September 29th, 2010) * fix URI escaping one last time ;) ## 0.1.7 (September 29th, 2010) * fix URI escaping to act according to the RFC * add specs for URL escaping ## 0.1.6 (September 6th, 2010) * support for URI escaping added (thanks to @joshbuddy) * bugfix to ensure we don't drop opening tags during escape_javascript (thanks to @nagybence) ## 0.1.5 (July 13th, 2010) * add URL escaping and unescaping * major refactor of HTML and Javascript escaping and unescaping logic for a decent speed up * HTML escaping now takes html_safe? into account (for Rails/ActiveSupport users) - thanks yury! ## 0.1.4 (June 9th, 2010) * ensure strings are passed in from monkey-patches ## 0.1.3 (June 9th, 2010) * cleaned some code up, removing duplication * moved to a more flexible character encoding scheme using Encoding.defaut_internal for 1.9 users ## 0.1.2 (June 8th, 2010) * forgot to add the ActionView monkey patch for JS escaping ;) ## 0.1.1 (June 8th, 2010) * added javascript escaping ## 0.1.0 (June 8th, 2010) * initial releaseescape-utils-0.2.4/MIT-LICENSE0000644000175000017500000000210211772525275015063 0ustar tfheentfheenCopyright (c) 2010-2011 Brian Lopez - http://github.com/brianmario Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.escape-utils-0.2.4/metadata.yml0000644000175000017500000001210011772525275015731 0ustar tfheentfheen--- !ruby/object:Gem::Specification name: escape_utils version: !ruby/object:Gem::Version hash: 31 prerelease: segments: - 0 - 2 - 4 version: 0.2.4 platform: ruby authors: - Brian Lopez autorequire: bindir: bin cert_chain: [] date: 2011-09-07 00:00:00 -07:00 default_executable: dependencies: - !ruby/object:Gem::Dependency name: rake-compiler prerelease: false requirement: &id001 !ruby/object:Gem::Requirement none: false requirements: - - ">=" - !ruby/object:Gem::Version hash: 9 segments: - 0 - 7 - 5 version: 0.7.5 type: :development version_requirements: *id001 - !ruby/object:Gem::Dependency name: rspec prerelease: false requirement: &id002 !ruby/object:Gem::Requirement none: false requirements: - - ">=" - !ruby/object:Gem::Version hash: 15 segments: - 2 - 0 - 0 version: 2.0.0 type: :development version_requirements: *id002 - !ruby/object:Gem::Dependency name: rack prerelease: false requirement: &id003 !ruby/object:Gem::Requirement none: false requirements: - - ">=" - !ruby/object:Gem::Version hash: 3 segments: - 0 version: "0" type: :development version_requirements: *id003 - !ruby/object:Gem::Dependency name: haml prerelease: false requirement: &id004 !ruby/object:Gem::Requirement none: false requirements: - - ">=" - !ruby/object:Gem::Version hash: 3 segments: - 0 version: "0" type: :development version_requirements: *id004 - !ruby/object:Gem::Dependency name: fast_xs prerelease: false requirement: &id005 !ruby/object:Gem::Requirement none: false requirements: - - ">=" - !ruby/object:Gem::Version hash: 3 segments: - 0 version: "0" type: :development version_requirements: *id005 - !ruby/object:Gem::Dependency name: actionpack prerelease: false requirement: &id006 !ruby/object:Gem::Requirement none: false requirements: - - ">=" - !ruby/object:Gem::Version hash: 3 segments: - 0 version: "0" type: :development version_requirements: *id006 - !ruby/object:Gem::Dependency name: url_escape prerelease: false requirement: &id007 !ruby/object:Gem::Requirement none: false requirements: - - ">=" - !ruby/object:Gem::Version hash: 3 segments: - 0 version: "0" type: :development version_requirements: *id007 description: email: seniorlopez@gmail.com executables: [] extensions: - ext/escape_utils/extconf.rb extra_rdoc_files: [] files: - .gitignore - .rspec - CHANGELOG.md - Gemfile - MIT-LICENSE - README.md - Rakefile - benchmark/html_escape.rb - benchmark/html_unescape.rb - benchmark/javascript_escape.rb - benchmark/javascript_unescape.rb - benchmark/url_escape.rb - benchmark/url_unescape.rb - escape_utils.gemspec - ext/escape_utils/buffer.c - ext/escape_utils/buffer.h - ext/escape_utils/escape_utils.c - ext/escape_utils/extconf.rb - ext/escape_utils/houdini.h - ext/escape_utils/houdini_html.c - ext/escape_utils/houdini_js.c - ext/escape_utils/houdini_uri.c - ext/escape_utils/html_unescape.h - ext/escape_utils/uri_escape.h - lib/escape_utils.rb - lib/escape_utils/html/cgi.rb - lib/escape_utils/html/erb.rb - lib/escape_utils/html/haml.rb - lib/escape_utils/html/rack.rb - lib/escape_utils/html_safety.rb - lib/escape_utils/javascript/action_view.rb - lib/escape_utils/url/cgi.rb - lib/escape_utils/url/erb.rb - lib/escape_utils/url/rack.rb - lib/escape_utils/url/uri.rb - lib/escape_utils/version.rb - spec/html/escape_spec.rb - spec/html/unescape_spec.rb - spec/html_safety_spec.rb - spec/javascript/escape_spec.rb - spec/javascript/unescape_spec.rb - spec/query/escape_spec.rb - spec/query/unescape_spec.rb - spec/rcov.opts - spec/spec_helper.rb - spec/uri/escape_spec.rb - spec/uri/unescape_spec.rb - spec/url/escape_spec.rb - spec/url/unescape_spec.rb has_rdoc: true homepage: http://github.com/brianmario/escape_utils licenses: [] post_install_message: rdoc_options: - --charset=UTF-8 require_paths: - lib - ext required_ruby_version: !ruby/object:Gem::Requirement none: false requirements: - - ">=" - !ruby/object:Gem::Version hash: 3 segments: - 0 version: "0" required_rubygems_version: !ruby/object:Gem::Requirement none: false requirements: - - ">=" - !ruby/object:Gem::Version hash: 3 segments: - 0 version: "0" requirements: [] rubyforge_project: rubygems_version: 1.6.2 signing_key: specification_version: 3 summary: Faster string escaping routines for your web apps test_files: - spec/html/escape_spec.rb - spec/html/unescape_spec.rb - spec/html_safety_spec.rb - spec/javascript/escape_spec.rb - spec/javascript/unescape_spec.rb - spec/query/escape_spec.rb - spec/query/unescape_spec.rb - spec/rcov.opts - spec/spec_helper.rb - spec/uri/escape_spec.rb - spec/uri/unescape_spec.rb - spec/url/escape_spec.rb - spec/url/unescape_spec.rb escape-utils-0.2.4/.rspec0000644000175000017500000000003711772525275014551 0ustar tfheentfheen--format documentation --colourescape-utils-0.2.4/README.md0000644000175000017500000001272711772525275014724 0ustar tfheentfheen# escape_utils Being as though we're all html escaping everything these days, why not make it faster? For character encoding in 1.9, the output string's encoding is copied from the input string. It has monkey-patches for Rack::Utils, CGI, URI, ERB::Util and Haml and ActionView so you can drop this in and have your app start escaping fast as balls in no time It supports HTML, URL, URI and Javascript escaping/unescaping. ## Installing ``` sh gem install escape_utils ``` ## Warning: UTF-8 only escape_utils assumes all input is encoded as valid UTF-8. If you are dealing with other encodings do your best to transcode the string into a UTF-8 byte stream before handing it to escape_utils. On Ruby 1.9 this is as easy as: ``` ruby utf8_string = non_utf8_string.encode('UTF-8') ``` If you're on Ruby 1.8 you can use [charlock_holmes](https://github.com/brianmario/charlock_holmes) to transcode like so: ``` ruby # NOTE: we're assuming you know the encoding of `non_utf8_string` here. # if you don't, you can use the detection API of charlock_holmes utf8_string = CharlockHolmes::Converter.convert(non_utf8_string, other_encoding, 'UTF-8') ``` ## Usage ### HTML #### Escaping ``` ruby html = `curl -s http://maps.google.com` escaped_html = EscapeUtils.escape_html(html) ``` By default escape_utils will escape `/` characters with `/`, but you can disable that by setting `EscapeUtils.html_secure = false` or per-call by passing `false` as the second parameter to `escape_html` like `EscapeUtils.escape_html(html, false)` For more information check out: http://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Before_Inserting_Untrusted_Data_into_HTML_Element_Content #### Unescaping ``` ruby html = `curl -s http://maps.google.com` escaped_html = EscapeUtils.escape_html(html) html = EscapeUtils.unescape_html(escaped_html) ``` #### Monkey Patches ``` ruby require 'escape_utils/html/rack' # to patch Rack::Utils require 'escape_utils/html/erb' # to patch ERB::Util require 'escape_utils/html/cgi' # to patch CGI require 'escape_utils/html/haml' # to patch Haml::Helpers ``` ### URL Use (un)escape_uri to get RFC-compliant escaping (like PHP rawurlencode). Use (un)escape_url to get CGI escaping (where space is +). #### Escaping ``` ruby url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mcEA~!!#*YH*>@!U" escaped_url = EscapeUtils.escape_url(url) ``` #### Unescaping ``` ruby url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mcEA~!!#*YH*>@!U" escaped_url = EscapeUtils.escape_url(url) EscapeUtils.unescape_url(escaped_url) == url # => true ``` #### Monkey Patches ``` ruby require 'escape_utils/url/cgi' # to patch CGI require 'escape_utils/url/erb' # to patch ERB::Util require 'escape_utils/url/rack' # to patch Rack::Utils require 'escape_utils/url/uri' # to patch URI ``` ### Javascript #### Escaping ``` ruby javascript = `curl -s http://code.jquery.com/jquery-1.4.2.js` escaped_javascript = EscapeUtils.escape_javascript(javascript) ``` #### Unescaping ``` ruby javascript = `curl -s http://code.jquery.com/jquery-1.4.2.js` escaped_javascript = EscapeUtils.escape_javascript(javascript) EscapeUtils.unescape_javascript(escaped_javascript) == javascript # => true ``` #### Monkey Patches ``` ruby require 'escape_utils/javascript/action_view' # to patch ActionView::Helpers::JavaScriptHelper ``` ## Benchmarks In my testing, escaping html is around 10-30x faster than the pure ruby implementations in wide use today. While unescaping html is around 40-100x faster than CGI.unescapeHTML which is also pure ruby. Escaping Javascript is around 16-30x faster. This output is from my laptop using the benchmark scripts in the benchmarks folder. ### HTML #### Escaping ``` Rack::Utils.escape_html 9.650000 0.090000 9.740000 ( 9.750756) Haml::Helpers.html_escape 9.310000 0.110000 9.420000 ( 9.417317) ERB::Util.html_escape 5.330000 0.390000 5.720000 ( 5.748394) CGI.escapeHTML 5.370000 0.380000 5.750000 ( 5.791344) FasterHTMLEscape.html_escape 0.520000 0.010000 0.530000 ( 0.539485) fast_xs_extra#fast_xs_html 0.310000 0.030000 0.340000 ( 0.336734) EscapeUtils.escape_html 0.200000 0.050000 0.250000 ( 0.258839) ``` #### Unescaping ``` CGI.unescapeHTML 16.520000 0.080000 16.600000 ( 16.853888) EscapeUtils.unescape_html 0.120000 0.040000 0.160000 ( 0.162696) ``` ### Javascript #### Escaping ``` ActionView::Helpers::JavaScriptHelper#escape_javascript 3.810000 0.100000 3.910000 ( 3.925557) EscapeUtils.escape_javascript 0.200000 0.040000 0.240000 ( 0.236692) ``` #### Unescaping I didn't look that hard, but I'm not aware of another ruby library that does Javascript unescaping to benchmark against. Anyone know of any? ### URL #### Escaping ``` ERB::Util.url_encode 0.520000 0.010000 0.530000 ( 0.529277) Rack::Utils.escape 0.460000 0.010000 0.470000 ( 0.466962) CGI.escape 0.440000 0.000000 0.440000 ( 0.443017) URLEscape#escape 0.040000 0.000000 0.040000 ( 0.045661) fast_xs_extra#fast_xs_url 0.010000 0.000000 0.010000 ( 0.015429) EscapeUtils.escape_url 0.010000 0.000000 0.010000 ( 0.010843) ``` #### Unescaping ``` Rack::Utils.unescape 0.250000 0.010000 0.260000 ( 0.257558) CGI.unescape 0.250000 0.000000 0.250000 ( 0.257837) URLEscape#unescape 0.040000 0.000000 0.040000 ( 0.031548) fast_xs_extra#fast_uxs_cgi 0.010000 0.000000 0.010000 ( 0.006062) EscapeUtils.unescape_url 0.000000 0.000000 0.000000 ( 0.005679) ``` escape-utils-0.2.4/Rakefile0000644000175000017500000000134311772525275015102 0ustar tfheentfheen# rspec begin require 'rspec' require 'rspec/core/rake_task' desc "Run all examples with RCov" RSpec::Core::RakeTask.new('spec:rcov') do |t| t.rcov = true end RSpec::Core::RakeTask.new('spec') do |t| t.verbose = true end task :default => :spec rescue LoadError puts "rspec, or one of its dependencies, is not available. Install it with: sudo gem install rspec" end # rake-compiler require 'rake' unless defined? Rake gem 'rake-compiler', '>= 0.7.5' require "rake/extensiontask" Rake::ExtensionTask.new('escape_utils') do |ext| ext.cross_compile = true ext.cross_platform = ['x86-mingw32', 'x86-mswin32-60'] ext.lib_dir = File.join 'lib', 'escape_utils' end Rake::Task[:spec].prerequisites << :compileescape-utils-0.2.4/benchmark/0000755000175000017500000000000011772525275015366 5ustar tfheentfheenescape-utils-0.2.4/benchmark/url_unescape.rb0000644000175000017500000000224511772525275020403 0ustar tfheentfheen# encoding: utf-8 $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..') $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib') require 'rubygems' require 'benchmark' require 'rack' require 'cgi' require 'url_escape' require 'fast_xs_extra' require 'escape_utils' times = 10_000 url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----" url = url.force_encoding('us-ascii') if url.respond_to?(:force_encoding) escaped_url = EscapeUtils.escape_url(url) puts "Escaping a #{url.bytesize} byte URL #{times} times" Benchmark.bmbm do |x| x.report "Rack::Utils.unescape" do times.times do Rack::Utils.unescape(escaped_url) end end x.report "CGI.unescape" do times.times do CGI.unescape(escaped_url) end end x.report "URLEscape#unescape" do times.times do URLEscape.unescape(escaped_url) end end x.report "fast_xs_extra#fast_uxs_cgi" do times.times do url.fast_uxs_cgi end end x.report "EscapeUtils.unescape_url" do times.times do EscapeUtils.unescape_url(escaped_url) end end endescape-utils-0.2.4/benchmark/html_escape.rb0000644000175000017500000000236311772525275020203 0ustar tfheentfheen# encoding: utf-8 $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..') $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib') require 'rubygems' require 'benchmark' require 'rack' require 'erb' require 'cgi' require 'haml' require 'fast_xs_extra' require 'escape_utils' module HamlBench extend Haml::Helpers end times = 100 url = "http://en.wikipedia.org/wiki/Line_of_succession_to_the_British_throne" html = `curl -s #{url}` html = html.force_encoding('binary') if html.respond_to?(:force_encoding) puts "Escaping #{html.bytesize} bytes of html #{times} times, from #{url}" Benchmark.bmbm do |x| x.report "Rack::Utils.escape_html" do times.times do Rack::Utils.escape_html(html) end end x.report "Haml::Helpers.html_escape" do times.times do HamlBench.html_escape(html) end end x.report "ERB::Util.html_escape" do times.times do ERB::Util.html_escape(html) end end x.report "CGI.escapeHTML" do times.times do CGI.escapeHTML(html) end end x.report "fast_xs_extra#fast_xs_html" do times.times do html.fast_xs_html end end x.report "EscapeUtils.escape_html" do times.times do EscapeUtils.escape_html(html) end end endescape-utils-0.2.4/benchmark/html_unescape.rb0000644000175000017500000000155211772525275020545 0ustar tfheentfheen# encoding: utf-8 $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..') $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib') require 'rubygems' require 'benchmark' require 'cgi' require 'haml' require 'escape_utils' module HamlBench extend Haml::Helpers end times = 100 url = "http://en.wikipedia.org/wiki/Line_of_succession_to_the_British_throne" html = `curl -s #{url}` html = html.force_encoding('binary') if html.respond_to?(:force_encoding) escaped_html = EscapeUtils.escape_html(html) puts "Unescaping #{escaped_html.bytesize} bytes of escaped html #{times} times, from #{url}" Benchmark.bmbm do |x| x.report "CGI.unescapeHTML" do times.times do CGI.unescapeHTML(escaped_html) end end x.report "EscapeUtils.unescape_html" do times.times do EscapeUtils.unescape_html(escaped_html) end end endescape-utils-0.2.4/benchmark/javascript_unescape.rb0000644000175000017500000000137511772525275021752 0ustar tfheentfheen# encoding: utf-8 $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..') $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib') require 'rubygems' require 'benchmark' require 'escape_utils' times = 100 url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js" javascript = `curl -s #{url}` javascript = javascript.force_encoding('utf-8') if javascript.respond_to?(:force_encoding) escaped_javascript = EscapeUtils.escape_javascript(javascript) puts "Escaping #{escaped_javascript.bytesize} bytes of javascript #{times} times, from #{url}" Benchmark.bmbm do |x| x.report "EscapeUtils.escape_javascript" do times.times do EscapeUtils.unescape_javascript(escaped_javascript) end end endescape-utils-0.2.4/benchmark/javascript_escape.rb0000644000175000017500000000165211772525275021405 0ustar tfheentfheen# encoding: utf-8 $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..') $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib') require 'rubygems' require 'benchmark' require 'action_view' require 'escape_utils' class ActionPackBench extend ActionView::Helpers::JavaScriptHelper end times = 100 url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js" javascript = `curl -s #{url}` javascript = javascript.force_encoding('utf-8') if javascript.respond_to?(:force_encoding) puts "Escaping #{javascript.bytesize} bytes of javascript #{times} times, from #{url}" Benchmark.bmbm do |x| x.report "ActionView::Helpers::JavaScriptHelper#escape_javascript" do times.times do ActionPackBench.escape_javascript(javascript) end end x.report "EscapeUtils.escape_javascript" do times.times do EscapeUtils.escape_javascript(javascript) end end endescape-utils-0.2.4/benchmark/url_escape.rb0000644000175000017500000000227211772525275020040 0ustar tfheentfheen# encoding: utf-8 $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..') $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib') require 'rubygems' require 'benchmark' require 'rack' require 'erb' require 'cgi' require 'url_escape' require 'fast_xs_extra' require 'escape_utils' times = 10_000 url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----" url = url.force_encoding('us-ascii') if url.respond_to?(:force_encoding) puts "Escaping a #{url.bytesize} byte URL #{times} times" Benchmark.bmbm do |x| x.report "ERB::Util.url_encode" do times.times do ERB::Util.url_encode(url) end end x.report "Rack::Utils.escape" do times.times do Rack::Utils.escape(url) end end x.report "CGI.escape" do times.times do CGI.escape(url) end end x.report "URLEscape#escape" do times.times do URLEscape.escape(url) end end x.report "fast_xs_extra#fast_xs_url" do times.times do url.fast_xs_url end end x.report "EscapeUtils.escape_url" do times.times do EscapeUtils.escape_url(url) end end endescape-utils-0.2.4/.gitignore0000644000175000017500000000010211772525275015415 0ustar tfheentfheenMakefile *.o *.bundle pkg/* doc/* *.rbc tmp/ Gemfile.lock vendor/*escape-utils-0.2.4/ext/0000755000175000017500000000000011772525275014234 5ustar tfheentfheenescape-utils-0.2.4/ext/escape_utils/0000755000175000017500000000000011772525275016714 5ustar tfheentfheenescape-utils-0.2.4/ext/escape_utils/houdini_html.c0000644000175000017500000001066011772525275021546 0ustar tfheentfheen#include #include #include #include "houdini.h" #include "html_unescape.h" #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */ #define UNESCAPE_GROW_FACTOR(x) (x) /* unescaping shouldn't grow our buffer */ /* Helper _isdigit methods -- do not trust the current locale */ int _isxdigit(int c) { return strchr("0123456789ABCDEFabcdef", c) != NULL; } int _isdigit(int c) { return (c >= '0' && c <= '9'); } /** * According to the OWASP rules: * * & --> & * < --> < * > --> > * " --> " * ' --> ' ' is not recommended * / --> / forward slash is included as it helps end an HTML entity * */ static const char HTML_ESCAPE_TABLE[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; static const char *HTML_ESCAPES[] = { "", """, "&", "'", "/", "<", ">" }; void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size, int secure) { size_t i = 0, org, esc; bufgrow(ob, ESCAPE_GROW_FACTOR(size)); while (i < size) { org = i; while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0) i++; if (i > org) bufput(ob, src + org, i - org); /* escaping */ if (i >= size) break; /* The forward slash is only escaped in secure mode */ if (src[i] == '/' && !secure) { bufputc(ob, '/'); } else { bufputs(ob, HTML_ESCAPES[esc]); } i++; } } static inline void bufput_utf8(struct buf *ob, int c) { unsigned char unichar[4]; if (c < 0x80) { bufputc(ob, c); } else if (c < 0x800) { unichar[0] = 192 + (c / 64); unichar[1] = 128 + (c % 64); bufput(ob, unichar, 2); } else if (c - 0xd800u < 0x800) { bufputc(ob, '?'); } else if (c < 0x10000) { unichar[0] = 224 + (c / 4096); unichar[1] = 128 + (c / 64) % 64; unichar[2] = 128 + (c % 64); bufput(ob, unichar, 3); } else if (c < 0x110000) { unichar[0] = 240 + (c / 262144); unichar[1] = 128 + (c / 4096) % 64; unichar[2] = 128 + (c / 64) % 64; unichar[3] = 128 + (c % 64); bufput(ob, unichar, 4); } else { bufputc(ob, '?'); } } static size_t unescape_ent(struct buf *ob, const uint8_t *src, size_t size) { size_t i = 0; if (size > 3 && src[0] == '#') { int codepoint = 0; if (_isdigit(src[1])) { for (i = 1; i < size && _isdigit(src[i]); ++i) codepoint = (codepoint * 10) + (src[i] - '0'); } else if (src[1] == 'x' || src[1] == 'X') { for (i = 2; i < size && _isxdigit(src[i]); ++i) codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9); } if (i < size && src[i] == ';') { bufput_utf8(ob, codepoint); return i + 1; } } else { if (size > MAX_WORD_LENGTH) size = MAX_WORD_LENGTH; for (i = MIN_WORD_LENGTH; i < size; ++i) { if (src[i] == ' ') break; if (src[i] == ';') { const struct html_ent *entity = find_entity((char *)src, i); if (entity != NULL) { bufput(ob, entity->utf8, entity->utf8_len); return i + 1; } break; } } } bufputc(ob, '&'); return 0; } void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size) { size_t i = 0, org; bufgrow(ob, UNESCAPE_GROW_FACTOR(size)); while (i < size) { org = i; while (i < size && src[i] != '&') i++; if (i > org) bufput(ob, src + org, i - org); /* escaping */ if (i >= size) break; i++; i += unescape_ent(ob, src + i, size - i); } } #ifdef TEST int main() { const char TEST_STRING[] = "This ♣ is & just "an example♦""; struct buf *buffer; buffer = bufnew(128); houdini_unescape_html(buffer, TEST_STRING, strlen(TEST_STRING)); printf("Result: %.*s\n", (int)buffer->size, buffer->data); bufrelease(buffer); return 0; } #endif escape-utils-0.2.4/ext/escape_utils/buffer.c0000644000175000017500000001066711772525275020343 0ustar tfheentfheen/* * Copyright (c) 2008, Natacha Porté * Copyright (c) 2011, Vicent Martí * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #define BUFFER_MAX_ALLOC_SIZE (1024 * 1024 * 16) //16mb #include "buffer.h" #include #include #include /* MSVC compat */ #if defined(_MSC_VER) # define _buf_vsnprintf _vsnprintf #else # define _buf_vsnprintf vsnprintf #endif int bufprefix(const struct buf *buf, const char *prefix) { size_t i; for (i = 0; i < buf->size; ++i) { if (prefix[i] == 0) return 0; if (buf->data[i] != prefix[i]) return buf->data[i] - prefix[i]; } return 0; } /* bufgrow: increasing the allocated size to the given value */ int bufgrow(struct buf *buf, size_t neosz) { size_t neoasz; void *neodata; if (!buf || !buf->unit || neosz > BUFFER_MAX_ALLOC_SIZE) return BUF_ENOMEM; if (buf->asize >= neosz) return BUF_OK; neoasz = buf->asize + buf->unit; while (neoasz < neosz) neoasz += buf->unit; neodata = realloc(buf->data, neoasz); if (!neodata) return BUF_ENOMEM; buf->data = neodata; buf->asize = neoasz; return BUF_OK; } /* bufnew: allocation of a new buffer */ struct buf * bufnew(size_t unit) { struct buf *ret; ret = malloc(sizeof (struct buf)); if (ret) { ret->data = 0; ret->size = ret->asize = 0; ret->unit = unit; } return ret; } /* bufnullterm: NULL-termination of the string array */ const char * bufcstr(struct buf *buf) { if (!buf || !buf->unit) return NULL; if (buf->size < buf->asize && buf->data[buf->size] == 0) return (char *)buf->data; if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1) == 0) { buf->data[buf->size] = 0; return (char *)buf->data; } return NULL; } /* bufprintf: formatted printing to a buffer */ void bufprintf(struct buf *buf, const char *fmt, ...) { va_list ap; if (!buf || !buf->unit) return; va_start(ap, fmt); vbufprintf(buf, fmt, ap); va_end(ap); } /* bufput: appends raw data to a buffer */ void bufput(struct buf *buf, const void *data, size_t len) { if (!buf) return; if (buf->size + len > buf->asize && bufgrow(buf, buf->size + len) < 0) return; memcpy(buf->data + buf->size, data, len); buf->size += len; } /* bufputs: appends a NUL-terminated string to a buffer */ void bufputs(struct buf *buf, const char *str) { bufput(buf, str, strlen(str)); } /* bufputc: appends a single uint8_t to a buffer */ void bufputc(struct buf *buf, int c) { if (!buf) return; if (buf->size + 1 > buf->asize && bufgrow(buf, buf->size + 1) < 0) return; buf->data[buf->size] = c; buf->size += 1; } /* bufrelease: decrease the reference count and free the buffer if needed */ void bufrelease(struct buf *buf) { if (!buf) return; free(buf->data); free(buf); } /* bufreset: frees internal data of the buffer */ void bufreset(struct buf *buf) { if (!buf) return; free(buf->data); buf->data = NULL; buf->size = buf->asize = 0; } /* bufslurp: removes a given number of bytes from the head of the array */ void bufslurp(struct buf *buf, size_t len) { if (!buf || !buf->unit || len <= 0) return; if (len >= buf->size) { buf->size = 0; return; } buf->size -= len; memmove(buf->data, buf->data + len, buf->size); } /* vbufprintf: stdarg variant of formatted printing into a buffer */ void vbufprintf(struct buf *buf, const char *fmt, va_list ap) { int n; if (buf == 0 || (buf->size >= buf->asize && bufgrow(buf, buf->size + 1)) < 0) return; n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap); if (n < 0) { #ifdef _MSC_VER n = _vscprintf(fmt, ap); #else return; #endif } if ((size_t)n >= buf->asize - buf->size) { if (bufgrow(buf, buf->size + n + 1) < 0) return; n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap); } if (n < 0) return; buf->size += n; } escape-utils-0.2.4/ext/escape_utils/extconf.rb0000644000175000017500000000055211772525275020711 0ustar tfheentfheen# encoding: UTF-8 require 'mkmf' require 'rbconfig' $CFLAGS << ' -Wall -funroll-loops' $CFLAGS << ' -Wextra -O0 -ggdb3' if ENV['DEBUG'] if try_compile(< int main(void) { rb_cvar_set(Qnil, Qnil, Qnil); return 0; } SRC $CFLAGS << " -DRB_CVAR_SET_ARITY=3 " else $CFLAGS << " -DRB_CVAR_SET_ARITY=4 " end create_makefile("escape_utils") escape-utils-0.2.4/ext/escape_utils/escape_utils.c0000644000175000017500000000732611772525275021550 0ustar tfheentfheen// tell rbx not to use it's caching compat layer // by doing this we're making a promise to RBX that // we'll never modify the pointers we get back from RSTRING_PTR #define RSTRING_NOT_MODIFIED #include #if RB_CVAR_SET_ARITY == 4 # define rb_cvar_set(a,b,c) rb_cvar_set(a,b,c,0) #endif #ifdef HAVE_RUBY_ENCODING_H #include #endif #include "houdini.h" typedef void (*houdini_cb)(struct buf *, const uint8_t *, size_t); static VALUE rb_mEscapeUtils; /** * html_secure instance variable */ static ID rb_html_secure; static int g_html_secure = 1; static VALUE rb_eu_get_html_secure(VALUE self) { return rb_cvar_get(self, rb_html_secure); } static VALUE rb_eu_set_html_secure(VALUE self, VALUE val) { g_html_secure = RTEST(val); rb_cvar_set(self, rb_html_secure, val); return val; } /** * Generic template */ static VALUE rb_eu__generic( VALUE self, VALUE str, houdini_cb callback, size_t chunk_size) { VALUE result; struct buf *out_buf; if (NIL_P(str)) return rb_str_new2(""); Check_Type(str, T_STRING); out_buf = bufnew(chunk_size); callback(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str)); result = rb_str_new((char *)out_buf->data, out_buf->size); bufrelease(out_buf); #ifdef HAVE_RUBY_ENCODING_H rb_enc_copy(result, str); #endif return result; } /** * HTML methods */ static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self) { VALUE rb_out_buf, str, rb_secure; struct buf *out_buf; int secure = g_html_secure; if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) { if (rb_secure == Qfalse) { secure = 0; } } Check_Type(str, T_STRING); out_buf = bufnew(128); houdini_escape_html(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure); rb_out_buf = rb_str_new((char *)out_buf->data, out_buf->size); bufrelease(out_buf); #ifdef HAVE_RUBY_ENCODING_H rb_enc_copy(rb_out_buf, str); #endif return rb_out_buf; } static VALUE rb_eu_unescape_html(VALUE self, VALUE str) { return rb_eu__generic(self, str, &houdini_unescape_html, 128); } /** * JavaScript methods */ static VALUE rb_eu_escape_js(VALUE self, VALUE str) { return rb_eu__generic(self, str, &houdini_escape_js, 128); } static VALUE rb_eu_unescape_js(VALUE self, VALUE str) { return rb_eu__generic(self, str, &houdini_unescape_js, 128); } /** * URL methods */ static VALUE rb_eu_escape_url(VALUE self, VALUE str) { return rb_eu__generic(self, str, &houdini_escape_url, 32); } static VALUE rb_eu_unescape_url(VALUE self, VALUE str) { return rb_eu__generic(self, str, &houdini_unescape_url, 32); } /** * URI methods */ static VALUE rb_eu_escape_uri(VALUE self, VALUE str) { return rb_eu__generic(self, str, &houdini_escape_uri, 32); } static VALUE rb_eu_unescape_uri(VALUE self, VALUE str) { return rb_eu__generic(self, str, &houdini_unescape_uri, 32); } /** * Ruby Extension initializer */ void Init_escape_utils() { rb_mEscapeUtils = rb_define_module("EscapeUtils"); rb_define_method(rb_mEscapeUtils, "escape_html", rb_eu_escape_html, -1); rb_define_method(rb_mEscapeUtils, "unescape_html", rb_eu_unescape_html, 1); rb_define_method(rb_mEscapeUtils, "escape_javascript", rb_eu_escape_js, 1); rb_define_method(rb_mEscapeUtils, "unescape_javascript", rb_eu_unescape_js, 1); rb_define_method(rb_mEscapeUtils, "escape_url", rb_eu_escape_url, 1); rb_define_method(rb_mEscapeUtils, "unescape_url", rb_eu_unescape_url, 1); rb_define_method(rb_mEscapeUtils, "escape_uri", rb_eu_escape_uri, 1); rb_define_method(rb_mEscapeUtils, "unescape_uri", rb_eu_unescape_uri, 1); rb_define_singleton_method(rb_mEscapeUtils, "html_secure", rb_eu_get_html_secure, 0); rb_define_singleton_method(rb_mEscapeUtils, "html_secure=", rb_eu_set_html_secure, 1); rb_html_secure = rb_intern("@@html_secure"); } escape-utils-0.2.4/ext/escape_utils/houdini.h0000644000175000017500000000134611772525275020530 0ustar tfheentfheen#ifndef __HOUDINI_H__ #define __HOUDINI_H__ #include "buffer.h" extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size, int secure); extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size); extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size); extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size); extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size); extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size); extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size); extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size); #endif escape-utils-0.2.4/ext/escape_utils/houdini_js.c0000644000175000017500000000530311772525275021214 0ustar tfheentfheen#include #include #include #include "houdini.h" #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) #define UNESCAPE_GROW_FACTOR(x) (x) void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size) { size_t i = 0, org, ch; bufgrow(ob, UNESCAPE_GROW_FACTOR(size)); while (i < size) { org = i; while (i < size && src[i] != '\\') i++; if (i > org) bufput(ob, src + org, i - org); /* escaping */ if (i == size) break; if (++i == size) { bufputc(ob, '\\'); break; } ch = src[i]; switch (ch) { case 'n': ch = '\n'; /* pass through */ case '\\': case '\'': case '\"': case '/': bufputc(ob, ch); i++; break; default: bufputc(ob, '\\'); break; } } } static const char JS_ESCAPE[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size) { size_t i = 0, org, ch; bufgrow(ob, ESCAPE_GROW_FACTOR(size)); while (i < size) { org = i; while (i < size && JS_ESCAPE[src[i]] == 0) i++; if (i > org) bufput(ob, src + org, i - org); /* escaping */ if (i >= size) break; ch = src[i]; switch (ch) { case '/': /* * Escape only if preceded by a lt */ if (i && src[i - 1] == '<') bufputc(ob, '\\'); bufputc(ob, ch); break; case '\r': /* * Escape as \n, and skip the next \n if it's there */ if (i + 1 < size && src[i + 1] == '\n') i++; case '\n': /* * Escape actually as '\','n', not as '\', '\n' */ ch = 'n'; default: /* * Normal escaping */ bufputc(ob, '\\'); bufputc(ob, ch); break; } i++; } } //#define TEST #ifdef TEST int main() { const char TEST_STRING[] = "http% this \200 is a test"; struct buf *buffer; buffer = bufnew(128); houdini_escape_uri(buffer, TEST_STRING, strlen(TEST_STRING)); printf("Result: %.*s\n", (int)buffer->size, buffer->data); bufrelease(buffer); return 0; } #endif escape-utils-0.2.4/ext/escape_utils/houdini_uri.c0000644000175000017500000000447111772525275021404 0ustar tfheentfheen#include #include #include #include "houdini.h" #include "uri_escape.h" #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) #define UNESCAPE_GROW_FACTOR(x) (x) extern int _isxdigit(int c); static void escape(struct buf *ob, const uint8_t *src, size_t size, int is_url) { static const char hex_chars[] = "0123456789ABCDEF"; const char *safe_table = is_url ? URL_SAFE : URI_SAFE; size_t i = 0, org; char hex_str[3]; bufgrow(ob, ESCAPE_GROW_FACTOR(size)); hex_str[0] = '%'; while (i < size) { org = i; while (i < size && safe_table[src[i]] != 0) i++; if (i > org) bufput(ob, src + org, i - org); /* escaping */ if (i >= size) break; if (src[i] == ' ' && is_url) { bufputc(ob, '+'); } else { hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; hex_str[2] = hex_chars[src[i] & 0xF]; bufput(ob, hex_str, 3); } i++; } } #define hex2c(c) ((c | 32) % 39 - 9) static void unescape(struct buf *ob, const uint8_t *src, size_t size, int is_url) { size_t i = 0, org; bufgrow(ob, UNESCAPE_GROW_FACTOR(size)); while (i < size) { org = i; while (i < size && src[i] != '%') i++; if (i > org) bufput(ob, src + org, i - org); /* escaping */ if (i >= size) break; i++; if (i + 1 < size && _isxdigit(src[i]) && _isxdigit(src[i + 1])) { unsigned char new_char = (hex2c(src[i]) << 4) + hex2c(src[i + 1]); bufputc(ob, new_char); i += 2; } else { bufputc(ob, '%'); } } if (is_url) { char *find = (char *)bufcstr(ob); while ((find = strchr(find, '+')) != NULL) *find = ' '; } } void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size) { return escape(ob, src, size, 0); } void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size) { return escape(ob, src, size, 1); } void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size) { return unescape(ob, src, size, 0); } void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size) { return unescape(ob, src, size, 1); } //#define TEST #ifdef TEST int main() { const char TEST_STRING[] = "http% this \200 is a test"; struct buf *buffer; buffer = bufnew(128); houdini_escape_uri(buffer, TEST_STRING, strlen(TEST_STRING)); printf("Result: %.*s\n", (int)buffer->size, buffer->data); bufrelease(buffer); return 0; } #endif escape-utils-0.2.4/ext/escape_utils/uri_escape.h0000644000175000017500000000320711772525275021206 0ustar tfheentfheenstatic const char URL_SAFE[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; static const char URI_SAFE[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; escape-utils-0.2.4/ext/escape_utils/html_unescape.h0000644000175000017500000006452711772525275021732 0ustar tfheentfheen/* C code produced by gperf version 3.0.3 */ /* Command-line: gperf -t -N find_entity -H hash_entity -K entity -C -l --null-strings -m100 html_unescape.gperf */ /* Computed positions: -k'1-3,5,$' */ #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) /* The character set is not based on ISO-646. */ error "gperf generated tables don't work with this execution character set. Please report a bug to ." #endif #line 1 "html_unescape.gperf" struct html_ent { const char *entity; unsigned char utf8_len; unsigned char utf8[3]; }; #define TOTAL_KEYWORDS 252 #define MIN_WORD_LENGTH 2 #define MAX_WORD_LENGTH 8 #define MIN_HASH_VALUE 10 #define MAX_HASH_VALUE 418 /* maximum key range = 409, duplicates = 0 */ #ifdef __GNUC__ __inline #else #ifdef __cplusplus inline #endif #endif static unsigned int hash_entity (str, len) register const char *str; register unsigned int len; { static const unsigned short asso_values[] = { 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 12, 29, 24, 1, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 45, 137, 28, 17, 87, 3, 16, 8, 103, 419, 7, 11, 1, 5, 76, 116, 419, 1, 9, 16, 86, 419, 419, 9, 5, 2, 419, 419, 419, 419, 419, 419, 2, 28, 26, 4, 3, 109, 87, 141, 4, 197, 1, 36, 85, 12, 1, 1, 189, 55, 17, 6, 34, 61, 10, 5, 110, 11, 1, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419, 419 }; register int hval = len; switch (hval) { default: hval += asso_values[(unsigned char)str[4]]; /*FALLTHROUGH*/ case 4: case 3: hval += asso_values[(unsigned char)str[2]]; /*FALLTHROUGH*/ case 2: hval += asso_values[(unsigned char)str[1]+1]; /*FALLTHROUGH*/ case 1: hval += asso_values[(unsigned char)str[0]]; break; } return hval + asso_values[(unsigned char)str[len - 1]]; } #ifdef __GNUC__ __inline #ifdef __GNUC_STDC_INLINE__ __attribute__ ((__gnu_inline__)) #endif #endif const struct html_ent * find_entity (str, len) register const char *str; register unsigned int len; { static const unsigned char lengthtable[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 3, 0, 3, 4, 3, 3, 3, 0, 5, 6, 4, 5, 4, 4, 3, 5, 4, 4, 5, 5, 6, 0, 5, 4, 6, 5, 5, 3, 6, 3, 3, 5, 0, 0, 5, 5, 0, 5, 6, 6, 0, 0, 6, 0, 0, 3, 4, 0, 3, 6, 3, 6, 5, 5, 5, 5, 6, 6, 6, 6, 6, 2, 6, 5, 2, 6, 0, 6, 0, 3, 4, 6, 4, 0, 0, 0, 4, 7, 3, 0, 4, 4, 6, 5, 2, 5, 5, 5, 2, 6, 6, 3, 0, 4, 8, 2, 5, 4, 4, 4, 6, 3, 4, 0, 0, 0, 5, 3, 6, 4, 4, 5, 2, 6, 3, 2, 4, 3, 4, 3, 5, 4, 6, 3, 5, 5, 5, 5, 4, 5, 5, 6, 4, 6, 5, 4, 2, 5, 5, 0, 0, 6, 6, 4, 5, 6, 5, 6, 4, 6, 0, 4, 7, 4, 5, 6, 4, 5, 6, 0, 0, 6, 4, 0, 4, 6, 3, 0, 2, 6, 5, 6, 4, 4, 4, 4, 4, 4, 3, 0, 0, 5, 6, 4, 4, 7, 0, 2, 5, 0, 2, 5, 4, 6, 2, 5, 5, 6, 2, 4, 0, 2, 5, 0, 0, 5, 4, 6, 0, 6, 4, 0, 3, 5, 0, 4, 0, 4, 0, 5, 6, 5, 0, 0, 5, 5, 6, 5, 5, 6, 3, 5, 3, 0, 0, 0, 5, 3, 0, 0, 5, 4, 0, 5, 4, 0, 5, 4, 4, 5, 7, 5, 0, 6, 6, 6, 6, 0, 4, 4, 0, 6, 0, 0, 0, 5, 0, 6, 6, 4, 0, 4, 0, 4, 0, 4, 3, 0, 0, 0, 5, 7, 4, 6, 0, 6, 6, 0, 5, 0, 5, 0, 4, 0, 4, 0, 5, 6, 0, 3, 0, 5, 0, 0, 0, 2, 0, 0, 3, 3, 0, 5, 5, 5, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 3, 6, 0, 0, 0, 0, 7, 7, 0, 0, 0, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 6 }; static const struct html_ent wordlist[] = { {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, #line 132 "html_unescape.gperf" {"Rho", 2, { 0xCE, 0xA1 }}, {(char*)0}, {(char*)0}, {(char*)0}, #line 226 "html_unescape.gperf" {"and", 3, { 0xE2, 0x88, 0xA7 }}, {(char*)0}, #line 161 "html_unescape.gperf" {"phi", 2, { 0xCF, 0x86 }}, #line 148 "html_unescape.gperf" {"iota", 2, { 0xCE, 0xB9 }}, #line 163 "html_unescape.gperf" {"psi", 2, { 0xCF, 0x88 }}, #line 8 "html_unescape.gperf" {"amp", 1, { 0x26 }}, #line 230 "html_unescape.gperf" {"int", 3, { 0xE2, 0x88, 0xAB }}, {(char*)0}, #line 147 "html_unescape.gperf" {"theta", 2, { 0xCE, 0xB8 }}, #line 231 "html_unescape.gperf" {"there4", 3, { 0xE2, 0x88, 0xB4 }}, #line 223 "html_unescape.gperf" {"prop", 3, { 0xE2, 0x88, 0x9D }}, #line 164 "html_unescape.gperf" {"omega", 2, { 0xCF, 0x89 }}, #line 167 "html_unescape.gperf" {"ensp", 3, { 0xE2, 0x80, 0x82 }}, #line 218 "html_unescape.gperf" {"prod", 3, { 0xE2, 0x88, 0x8F }}, #line 24 "html_unescape.gperf" {"not", 2, { 0xC2, 0xAC }}, #line 194 "html_unescape.gperf" {"image", 3, { 0xE2, 0x84, 0x91 }}, #line 215 "html_unescape.gperf" {"isin", 3, { 0xE2, 0x88, 0x88 }}, #line 247 "html_unescape.gperf" {"sdot", 3, { 0xE2, 0x8B, 0x85 }}, #line 123 "html_unescape.gperf" {"Theta", 2, { 0xCE, 0x98 }}, #line 187 "html_unescape.gperf" {"prime", 3, { 0xE2, 0x80, 0xB2 }}, #line 110 "html_unescape.gperf" {"Scaron", 2, { 0xC5, 0xA0 }}, {(char*)0}, #line 197 "html_unescape.gperf" {"trade", 3, { 0xE2, 0x84, 0xA2 }}, #line 168 "html_unescape.gperf" {"emsp", 3, { 0xE2, 0x80, 0x83 }}, #line 169 "html_unescape.gperf" {"thinsp", 3, { 0xE2, 0x80, 0x89 }}, #line 149 "html_unescape.gperf" {"kappa", 2, { 0xCE, 0xBA }}, #line 106 "html_unescape.gperf" {"thorn", 2, { 0xC3, 0xBE }}, #line 162 "html_unescape.gperf" {"chi", 2, { 0xCF, 0x87 }}, #line 111 "html_unescape.gperf" {"scaron", 2, { 0xC5, 0xA1 }}, #line 137 "html_unescape.gperf" {"Chi", 2, { 0xCE, 0xA7 }}, #line 146 "html_unescape.gperf" {"eta", 2, { 0xCE, 0xB7 }}, #line 125 "html_unescape.gperf" {"Kappa", 2, { 0xCE, 0x9A }}, {(char*)0}, {(char*)0}, #line 216 "html_unescape.gperf" {"notin", 3, { 0xE2, 0x88, 0x89 }}, #line 15 "html_unescape.gperf" {"pound", 2, { 0xC2, 0xA3 }}, {(char*)0}, #line 32 "html_unescape.gperf" {"acute", 2, { 0xC2, 0xB4 }}, #line 97 "html_unescape.gperf" {"otilde", 2, { 0xC3, 0xB5 }}, #line 79 "html_unescape.gperf" {"atilde", 2, { 0xC3, 0xA3 }}, {(char*)0}, {(char*)0}, #line 61 "html_unescape.gperf" {"Ntilde", 2, { 0xC3, 0x91 }}, {(char*)0}, {(char*)0}, #line 228 "html_unescape.gperf" {"cap", 3, { 0xE2, 0x88, 0xA9 }}, #line 12 "html_unescape.gperf" {"nbsp", 2, { 0xC2, 0xA0 }}, {(char*)0}, #line 254 "html_unescape.gperf" {"loz", 3, { 0xE2, 0x97, 0x8A }}, #line 93 "html_unescape.gperf" {"ntilde", 2, { 0xC3, 0xB1 }}, #line 156 "html_unescape.gperf" {"rho", 2, { 0xCF, 0x81 }}, #line 245 "html_unescape.gperf" {"otimes", 3, { 0xE2, 0x8A, 0x97 }}, #line 96 "html_unescape.gperf" {"ocirc", 2, { 0xC3, 0xB4 }}, #line 78 "html_unescape.gperf" {"acirc", 2, { 0xC3, 0xA2 }}, #line 86 "html_unescape.gperf" {"ecirc", 2, { 0xC3, 0xAA }}, #line 90 "html_unescape.gperf" {"icirc", 2, { 0xC3, 0xAE }}, #line 95 "html_unescape.gperf" {"oacute", 2, { 0xC3, 0xB3 }}, #line 77 "html_unescape.gperf" {"aacute", 2, { 0xC3, 0xA1 }}, #line 85 "html_unescape.gperf" {"eacute", 2, { 0xC3, 0xA9 }}, #line 89 "html_unescape.gperf" {"iacute", 2, { 0xC3, 0xAD }}, #line 73 "html_unescape.gperf" {"Yacute", 2, { 0xC3, 0x9D }}, #line 227 "html_unescape.gperf" {"or", 3, { 0xE2, 0x88, 0xA8 }}, #line 221 "html_unescape.gperf" {"lowast", 3, { 0xE2, 0x88, 0x97 }}, #line 214 "html_unescape.gperf" {"nabla", 3, { 0xE2, 0x88, 0x87 }}, #line 10 "html_unescape.gperf" {"lt", 1, { 0x3C }}, #line 83 "html_unescape.gperf" {"ccedil", 2, { 0xC3, 0xA7 }}, {(char*)0}, #line 51 "html_unescape.gperf" {"Ccedil", 2, { 0xC3, 0x87 }}, {(char*)0}, #line 240 "html_unescape.gperf" {"sup", 3, { 0xE2, 0x8A, 0x83 }}, #line 241 "html_unescape.gperf" {"nsub", 3, { 0xE2, 0x8A, 0x84 }}, #line 189 "html_unescape.gperf" {"lsaquo", 3, { 0xE2, 0x80, 0xB9 }}, #line 243 "html_unescape.gperf" {"supe", 3, { 0xE2, 0x8A, 0x87 }}, {(char*)0}, {(char*)0}, {(char*)0}, #line 34 "html_unescape.gperf" {"para", 2, { 0xC2, 0xB6 }}, #line 154 "html_unescape.gperf" {"omicron", 2, { 0xCE, 0xBF }}, #line 229 "html_unescape.gperf" {"cup", 3, { 0xE2, 0x88, 0xAA }}, {(char*)0}, #line 211 "html_unescape.gperf" {"part", 3, { 0xE2, 0x88, 0x82 }}, #line 37 "html_unescape.gperf" {"sup1", 2, { 0xC2, 0xB9 }}, #line 47 "html_unescape.gperf" {"Atilde", 2, { 0xC3, 0x83 }}, #line 140 "html_unescape.gperf" {"alpha", 2, { 0xCE, 0xB1 }}, #line 127 "html_unescape.gperf" {"Mu", 2, { 0xCE, 0x9C }}, #line 103 "html_unescape.gperf" {"ucirc", 2, { 0xC3, 0xBB }}, #line 139 "html_unescape.gperf" {"Omega", 2, { 0xCE, 0xA9 }}, #line 191 "html_unescape.gperf" {"oline", 3, { 0xE2, 0x80, 0xBE }}, #line 128 "html_unescape.gperf" {"Nu", 2, { 0xCE, 0x9D }}, #line 102 "html_unescape.gperf" {"uacute", 2, { 0xC3, 0xBA }}, #line 190 "html_unescape.gperf" {"rsaquo", 3, { 0xE2, 0x80, 0xBA }}, #line 159 "html_unescape.gperf" {"tau", 2, { 0xCF, 0x84 }}, {(char*)0}, #line 31 "html_unescape.gperf" {"sup3", 2, { 0xC2, 0xB3 }}, #line 165 "html_unescape.gperf" {"thetasym", 2, { 0xCF, 0x91 }}, #line 152 "html_unescape.gperf" {"nu", 2, { 0xCE, 0xBD }}, #line 46 "html_unescape.gperf" {"Acirc", 2, { 0xC3, 0x82 }}, #line 38 "html_unescape.gperf" {"ordm", 2, { 0xC2, 0xBA }}, #line 30 "html_unescape.gperf" {"sup2", 2, { 0xC2, 0xB2 }}, #line 242 "html_unescape.gperf" {"sube", 3, { 0xE2, 0x8A, 0x86 }}, #line 45 "html_unescape.gperf" {"Aacute", 2, { 0xC3, 0x81 }}, #line 134 "html_unescape.gperf" {"Tau", 2, { 0xCE, 0xA4 }}, #line 124 "html_unescape.gperf" {"Iota", 2, { 0xCE, 0x99 }}, {(char*)0}, {(char*)0}, {(char*)0}, #line 248 "html_unescape.gperf" {"lceil", 3, { 0xE2, 0x8C, 0x88 }}, #line 20 "html_unescape.gperf" {"uml", 2, { 0xC2, 0xA8 }}, #line 43 "html_unescape.gperf" {"iquest", 2, { 0xC2, 0xBF }}, #line 121 "html_unescape.gperf" {"Zeta", 2, { 0xCE, 0x96 }}, #line 193 "html_unescape.gperf" {"euro", 3, { 0xE2, 0x82, 0xAC }}, #line 234 "html_unescape.gperf" {"asymp", 3, { 0xE2, 0x89, 0x88 }}, #line 235 "html_unescape.gperf" {"ne", 3, { 0xE2, 0x89, 0xA0 }}, #line 65 "html_unescape.gperf" {"Otilde", 2, { 0xC3, 0x95 }}, #line 122 "html_unescape.gperf" {"Eta", 2, { 0xCE, 0x97 }}, #line 11 "html_unescape.gperf" {"gt", 1, { 0x3E }}, #line 233 "html_unescape.gperf" {"cong", 3, { 0xE2, 0x89, 0x85 }}, #line 136 "html_unescape.gperf" {"Phi", 2, { 0xCE, 0xA6 }}, #line 145 "html_unescape.gperf" {"zeta", 2, { 0xCE, 0xB6 }}, #line 138 "html_unescape.gperf" {"Psi", 2, { 0xCE, 0xA8 }}, #line 212 "html_unescape.gperf" {"exist", 3, { 0xE2, 0x88, 0x83 }}, #line 22 "html_unescape.gperf" {"ordf", 2, { 0xC2, 0xAA }}, #line 126 "html_unescape.gperf" {"Lambda", 2, { 0xCE, 0x9B }}, #line 239 "html_unescape.gperf" {"sub", 3, { 0xE2, 0x8A, 0x82 }}, #line 118 "html_unescape.gperf" {"Gamma", 2, { 0xCE, 0x93 }}, #line 249 "html_unescape.gperf" {"rceil", 3, { 0xE2, 0x8C, 0x89 }}, #line 116 "html_unescape.gperf" {"Alpha", 2, { 0xCE, 0x91 }}, #line 64 "html_unescape.gperf" {"Ocirc", 2, { 0xC3, 0x94 }}, #line 21 "html_unescape.gperf" {"copy", 2, { 0xC2, 0xA9 }}, #line 224 "html_unescape.gperf" {"infin", 3, { 0xE2, 0x88, 0x9E }}, #line 222 "html_unescape.gperf" {"radic", 3, { 0xE2, 0x88, 0x9A }}, #line 63 "html_unescape.gperf" {"Oacute", 2, { 0xC3, 0x93 }}, #line 202 "html_unescape.gperf" {"darr", 3, { 0xE2, 0x86, 0x93 }}, #line 40 "html_unescape.gperf" {"frac14", 2, { 0xC2, 0xBC }}, #line 188 "html_unescape.gperf" {"Prime", 3, { 0xE2, 0x80, 0xB3 }}, #line 141 "html_unescape.gperf" {"beta", 2, { 0xCE, 0xB2 }}, #line 237 "html_unescape.gperf" {"le", 3, { 0xE2, 0x89, 0xA4 }}, #line 71 "html_unescape.gperf" {"Ucirc", 2, { 0xC3, 0x9B }}, #line 54 "html_unescape.gperf" {"Ecirc", 2, { 0xC3, 0x8A }}, {(char*)0}, {(char*)0}, #line 70 "html_unescape.gperf" {"Uacute", 2, { 0xC3, 0x9A }}, #line 53 "html_unescape.gperf" {"Eacute", 2, { 0xC3, 0x89 }}, #line 14 "html_unescape.gperf" {"cent", 2, { 0xC2, 0xA2 }}, #line 143 "html_unescape.gperf" {"delta", 2, { 0xCE, 0xB4 }}, #line 42 "html_unescape.gperf" {"frac34", 2, { 0xC2, 0xBE }}, #line 204 "html_unescape.gperf" {"crarr", 3, { 0xE2, 0x86, 0xB5 }}, #line 150 "html_unescape.gperf" {"lambda", 2, { 0xCE, 0xBB }}, #line 19 "html_unescape.gperf" {"sect", 2, { 0xC2, 0xA7 }}, #line 16 "html_unescape.gperf" {"curren", 2, { 0xC2, 0xA4 }}, {(char*)0}, #line 184 "html_unescape.gperf" {"bull", 3, { 0xE2, 0x80, 0xA2 }}, #line 130 "html_unescape.gperf" {"Omicron", 2, { 0xCE, 0x9F }}, #line 252 "html_unescape.gperf" {"lang", 3, { 0xE2, 0x9F, 0xA8 }}, #line 58 "html_unescape.gperf" {"Icirc", 2, { 0xC3, 0x8E }}, #line 18 "html_unescape.gperf" {"brvbar", 2, { 0xC2, 0xA6 }}, #line 246 "html_unescape.gperf" {"perp", 3, { 0xE2, 0x8A, 0xA5 }}, #line 119 "html_unescape.gperf" {"Delta", 2, { 0xCE, 0x94 }}, #line 57 "html_unescape.gperf" {"Iacute", 2, { 0xC3, 0x8D }}, {(char*)0}, {(char*)0}, #line 41 "html_unescape.gperf" {"frac12", 2, { 0xC2, 0xBD }}, #line 200 "html_unescape.gperf" {"uarr", 3, { 0xE2, 0x86, 0x91 }}, {(char*)0}, #line 199 "html_unescape.gperf" {"larr", 3, { 0xE2, 0x86, 0x90 }}, #line 105 "html_unescape.gperf" {"yacute", 2, { 0xC3, 0xBD }}, #line 225 "html_unescape.gperf" {"ang", 3, { 0xE2, 0x88, 0xA0 }}, {(char*)0}, #line 151 "html_unescape.gperf" {"mu", 2, { 0xCE, 0xBC }}, #line 182 "html_unescape.gperf" {"dagger", 3, { 0xE2, 0x80, 0xA0 }}, #line 256 "html_unescape.gperf" {"clubs", 3, { 0xE2, 0x99, 0xA3 }}, #line 195 "html_unescape.gperf" {"weierp", 3, { 0xE2, 0x84, 0x98 }}, #line 253 "html_unescape.gperf" {"rang", 3, { 0xE2, 0x9F, 0xA9 }}, #line 98 "html_unescape.gperf" {"ouml", 2, { 0xC3, 0xB6 }}, #line 80 "html_unescape.gperf" {"auml", 2, { 0xC3, 0xA4 }}, #line 87 "html_unescape.gperf" {"euml", 2, { 0xC3, 0xAB }}, #line 91 "html_unescape.gperf" {"iuml", 2, { 0xC3, 0xAF }}, #line 112 "html_unescape.gperf" {"Yuml", 2, { 0xC5, 0xB8 }}, #line 60 "html_unescape.gperf" {"ETH", 2, { 0xC3, 0x90 }}, {(char*)0}, {(char*)0}, #line 13 "html_unescape.gperf" {"iexcl", 2, { 0xC2, 0xA1 }}, #line 183 "html_unescape.gperf" {"Dagger", 3, { 0xE2, 0x80, 0xA1 }}, #line 201 "html_unescape.gperf" {"rarr", 3, { 0xE2, 0x86, 0x92 }}, #line 27 "html_unescape.gperf" {"macr", 2, { 0xC2, 0xAF }}, #line 198 "html_unescape.gperf" {"alefsym", 3, { 0xE2, 0x84, 0xB5 }}, {(char*)0}, #line 238 "html_unescape.gperf" {"ge", 3, { 0xE2, 0x89, 0xA5 }}, #line 81 "html_unescape.gperf" {"aring", 2, { 0xC3, 0xA5 }}, {(char*)0}, #line 155 "html_unescape.gperf" {"pi", 2, { 0xCF, 0x80 }}, #line 192 "html_unescape.gperf" {"frasl", 3, { 0xE2, 0x81, 0x84 }}, #line 196 "html_unescape.gperf" {"real", 3, { 0xE2, 0x84, 0x9C }}, #line 100 "html_unescape.gperf" {"oslash", 2, { 0xC3, 0xB8 }}, #line 153 "html_unescape.gperf" {"xi", 2, { 0xCE, 0xBE }}, #line 142 "html_unescape.gperf" {"gamma", 2, { 0xCE, 0xB3 }}, #line 74 "html_unescape.gperf" {"THORN", 2, { 0xC3, 0x9E }}, #line 186 "html_unescape.gperf" {"permil", 3, { 0xE2, 0x80, 0xB0 }}, #line 129 "html_unescape.gperf" {"Xi", 2, { 0xCE, 0x9E }}, #line 9 "html_unescape.gperf" {"apos", 1, { 0x27 }}, {(char*)0}, #line 217 "html_unescape.gperf" {"ni", 3, { 0xE2, 0x88, 0x8B }}, #line 36 "html_unescape.gperf" {"cedil", 2, { 0xC2, 0xB8 }}, {(char*)0}, {(char*)0}, #line 236 "html_unescape.gperf" {"equiv", 3, { 0xE2, 0x89, 0xA1 }}, #line 104 "html_unescape.gperf" {"uuml", 2, { 0xC3, 0xBC }}, #line 250 "html_unescape.gperf" {"lfloor", 3, { 0xE2, 0x8C, 0x8A }}, {(char*)0}, #line 29 "html_unescape.gperf" {"plusmn", 2, { 0xC2, 0xB1 }}, #line 113 "html_unescape.gperf" {"fnof", 2, { 0xC6, 0x92 }}, {(char*)0}, #line 172 "html_unescape.gperf" {"lrm", 3, { 0xE2, 0x80, 0x8E }}, #line 181 "html_unescape.gperf" {"bdquo", 3, { 0xE2, 0x80, 0x9E }}, {(char*)0}, #line 170 "html_unescape.gperf" {"zwnj", 3, { 0xE2, 0x80, 0x8C }}, {(char*)0}, #line 48 "html_unescape.gperf" {"Auml", 2, { 0xC3, 0x84 }}, {(char*)0}, #line 75 "html_unescape.gperf" {"szlig", 2, { 0xC3, 0x9F }}, #line 255 "html_unescape.gperf" {"spades", 3, { 0xE2, 0x99, 0xA0 }}, #line 179 "html_unescape.gperf" {"ldquo", 3, { 0xE2, 0x80, 0x9C }}, {(char*)0}, {(char*)0}, #line 176 "html_unescape.gperf" {"lsquo", 3, { 0xE2, 0x80, 0x98 }}, #line 178 "html_unescape.gperf" {"sbquo", 3, { 0xE2, 0x80, 0x9A }}, #line 251 "html_unescape.gperf" {"rfloor", 3, { 0xE2, 0x8C, 0x8B }}, #line 213 "html_unescape.gperf" {"empty", 3, { 0xE2, 0x88, 0x85 }}, #line 258 "html_unescape.gperf" {"diams", 3, { 0xE2, 0x99, 0xA6 }}, #line 210 "html_unescape.gperf" {"forall", 3, { 0xE2, 0x88, 0x80 }}, #line 25 "html_unescape.gperf" {"shy", 2, { 0xC2, 0xAD }}, #line 49 "html_unescape.gperf" {"Aring", 2, { 0xC3, 0x85 }}, #line 17 "html_unescape.gperf" {"yen", 2, { 0xC2, 0xA5 }}, {(char*)0}, {(char*)0}, {(char*)0}, #line 115 "html_unescape.gperf" {"tilde", 2, { 0xCB, 0x9C }}, #line 219 "html_unescape.gperf" {"sum", 3, { 0xE2, 0x88, 0x91 }}, {(char*)0}, {(char*)0}, #line 180 "html_unescape.gperf" {"rdquo", 3, { 0xE2, 0x80, 0x9D }}, #line 208 "html_unescape.gperf" {"dArr", 3, { 0xE2, 0x87, 0x93 }}, {(char*)0}, #line 177 "html_unescape.gperf" {"rsquo", 3, { 0xE2, 0x80, 0x99 }}, #line 117 "html_unescape.gperf" {"Beta", 2, { 0xCE, 0x92 }}, {(char*)0}, #line 23 "html_unescape.gperf" {"laquo", 2, { 0xC2, 0xAB }}, #line 7 "html_unescape.gperf" {"quot", 1, { 0x22 }}, #line 66 "html_unescape.gperf" {"Ouml", 2, { 0xC3, 0x96 }}, #line 50 "html_unescape.gperf" {"AElig", 2, { 0xC3, 0x86 }}, #line 144 "html_unescape.gperf" {"epsilon", 2, { 0xCE, 0xB5 }}, #line 244 "html_unescape.gperf" {"oplus", 3, { 0xE2, 0x8A, 0x95 }}, {(char*)0}, #line 94 "html_unescape.gperf" {"ograve", 2, { 0xC3, 0xB2 }}, #line 76 "html_unescape.gperf" {"agrave", 2, { 0xC3, 0xA0 }}, #line 84 "html_unescape.gperf" {"egrave", 2, { 0xC3, 0xA8 }}, #line 88 "html_unescape.gperf" {"igrave", 2, { 0xC3, 0xAC }}, {(char*)0}, #line 72 "html_unescape.gperf" {"Uuml", 2, { 0xC3, 0x9C }}, #line 55 "html_unescape.gperf" {"Euml", 2, { 0xC3, 0x8B }}, {(char*)0}, #line 99 "html_unescape.gperf" {"divide", 2, { 0xC3, 0xB7 }}, {(char*)0}, {(char*)0}, {(char*)0}, #line 39 "html_unescape.gperf" {"raquo", 2, { 0xC2, 0xBB }}, {(char*)0}, #line 257 "html_unescape.gperf" {"hearts", 3, { 0xE2, 0x99, 0xA5 }}, #line 68 "html_unescape.gperf" {"Oslash", 2, { 0xC3, 0x98 }}, #line 203 "html_unescape.gperf" {"harr", 3, { 0xE2, 0x86, 0x94 }}, {(char*)0}, #line 206 "html_unescape.gperf" {"uArr", 3, { 0xE2, 0x87, 0x91 }}, {(char*)0}, #line 205 "html_unescape.gperf" {"lArr", 3, { 0xE2, 0x87, 0x90 }}, {(char*)0}, #line 59 "html_unescape.gperf" {"Iuml", 2, { 0xC3, 0x8F }}, #line 28 "html_unescape.gperf" {"deg", 2, { 0xC2, 0xB0 }}, {(char*)0}, {(char*)0}, {(char*)0}, #line 108 "html_unescape.gperf" {"OElig", 2, { 0xC5, 0x92 }}, #line 160 "html_unescape.gperf" {"upsilon", 2, { 0xCF, 0x85 }}, #line 107 "html_unescape.gperf" {"yuml", 2, { 0xC3, 0xBF }}, #line 185 "html_unescape.gperf" {"hellip", 3, { 0xE2, 0x80, 0xA6 }}, {(char*)0}, #line 35 "html_unescape.gperf" {"middot", 2, { 0xC2, 0xB7 }}, #line 101 "html_unescape.gperf" {"ugrave", 2, { 0xC3, 0xB9 }}, {(char*)0}, #line 133 "html_unescape.gperf" {"Sigma", 2, { 0xCE, 0xA3 }}, {(char*)0}, #line 174 "html_unescape.gperf" {"ndash", 3, { 0xE2, 0x80, 0x93 }}, {(char*)0}, #line 207 "html_unescape.gperf" {"rArr", 3, { 0xE2, 0x87, 0x92 }}, {(char*)0}, #line 114 "html_unescape.gperf" {"circ", 2, { 0xCB, 0x86 }}, {(char*)0}, #line 158 "html_unescape.gperf" {"sigma", 2, { 0xCF, 0x83 }}, #line 44 "html_unescape.gperf" {"Agrave", 2, { 0xC3, 0x80 }}, {(char*)0}, #line 173 "html_unescape.gperf" {"rlm", 3, { 0xE2, 0x80, 0x8F }}, {(char*)0}, #line 33 "html_unescape.gperf" {"micro", 2, { 0xC2, 0xB5 }}, {(char*)0}, {(char*)0}, {(char*)0}, #line 131 "html_unescape.gperf" {"Pi", 2, { 0xCE, 0xA0 }}, {(char*)0}, {(char*)0}, #line 92 "html_unescape.gperf" {"eth", 2, { 0xC3, 0xB0 }}, #line 166 "html_unescape.gperf" {"piv", 2, { 0xCF, 0x96 }}, {(char*)0}, #line 109 "html_unescape.gperf" {"oelig", 2, { 0xC5, 0x93 }}, #line 82 "html_unescape.gperf" {"aelig", 2, { 0xC3, 0xA6 }}, #line 67 "html_unescape.gperf" {"times", 2, { 0xC3, 0x97 }}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, #line 220 "html_unescape.gperf" {"minus", 3, { 0xE2, 0x88, 0x92 }}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, #line 26 "html_unescape.gperf" {"reg", 2, { 0xC2, 0xAE }}, #line 62 "html_unescape.gperf" {"Ograve", 2, { 0xC3, 0x92 }}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, #line 135 "html_unescape.gperf" {"Upsilon", 2, { 0xCE, 0xA5 }}, #line 120 "html_unescape.gperf" {"Epsilon", 2, { 0xCE, 0x95 }}, {(char*)0}, {(char*)0}, {(char*)0}, #line 69 "html_unescape.gperf" {"Ugrave", 2, { 0xC3, 0x99 }}, #line 52 "html_unescape.gperf" {"Egrave", 2, { 0xC3, 0x88 }}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, #line 56 "html_unescape.gperf" {"Igrave", 2, { 0xC3, 0x8C }}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, #line 175 "html_unescape.gperf" {"mdash", 3, { 0xE2, 0x80, 0x94 }}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, #line 232 "html_unescape.gperf" {"sim", 3, { 0xE2, 0x88, 0xBC }}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, #line 209 "html_unescape.gperf" {"hArr", 3, { 0xE2, 0x87, 0x94 }}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, #line 171 "html_unescape.gperf" {"zwj", 3, { 0xE2, 0x80, 0x8D }}, {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, #line 157 "html_unescape.gperf" {"sigmaf", 2, { 0xCF, 0x82 }} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) { register int key = hash_entity (str, len); if (key <= MAX_HASH_VALUE && key >= 0) if (len == lengthtable[key]) { register const char *s = wordlist[key].entity; if (s && *str == *s && !memcmp (str + 1, s + 1, len - 1)) return &wordlist[key]; } } return 0; } escape-utils-0.2.4/ext/escape_utils/buffer.h0000644000175000017500000000573511772525275020350 0ustar tfheentfheen/* * Copyright (c) 2008, Natacha Porté * Copyright (c) 2011, Vicent Martí * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifndef __GEN_BUFFER_H__ #define __GEN_BUFFER_H__ #include #include #include #if defined(_MSC_VER) #define __attribute__(x) #define inline #endif typedef enum { BUF_OK = 0, BUF_ENOMEM = -1, } buferror_t; /* struct buf: character array buffer */ struct buf { uint8_t *data; /* actual character data */ size_t size; /* size of the string */ size_t asize; /* allocated size (0 = volatile buffer) */ size_t unit; /* reallocation unit size (0 = read-only buffer) */ }; /* CONST_BUF: global buffer from a string litteral */ #define BUF_STATIC(string) \ { (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 } /* VOLATILE_BUF: macro for creating a volatile buffer on the stack */ #define BUF_VOLATILE(strname) \ { (uint8_t *)strname, strlen(strname), 0, 0, 0 } /* BUFPUTSL: optimized bufputs of a string litteral */ #define BUFPUTSL(output, literal) \ bufput(output, literal, sizeof literal - 1) /* bufgrow: increasing the allocated size to the given value */ int bufgrow(struct buf *, size_t); /* bufnew: allocation of a new buffer */ struct buf *bufnew(size_t) __attribute__ ((malloc)); /* bufnullterm: NUL-termination of the string array (making a C-string) */ const char *bufcstr(struct buf *); /* bufprefix: compare the beginning of a buffer with a string */ int bufprefix(const struct buf *buf, const char *prefix); /* bufput: appends raw data to a buffer */ void bufput(struct buf *, const void *, size_t); /* bufputs: appends a NUL-terminated string to a buffer */ void bufputs(struct buf *, const char *); /* bufputc: appends a single char to a buffer */ void bufputc(struct buf *, int); /* bufrelease: decrease the reference count and free the buffer if needed */ void bufrelease(struct buf *); /* bufreset: frees internal data of the buffer */ void bufreset(struct buf *); /* bufslurp: removes a given number of bytes from the head of the array */ void bufslurp(struct buf *, size_t); /* bufprintf: formatted printing to a buffer */ void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3))); /* vbufprintf: stdarg variant of formatted printing into a buffer */ void vbufprintf(struct buf *, const char * , va_list); #endif escape-utils-0.2.4/Gemfile0000644000175000017500000000003111772525275014721 0ustar tfheentfheensource :rubygems gemspecescape-utils-0.2.4/lib/0000755000175000017500000000000011772525275014202 5ustar tfheentfheenescape-utils-0.2.4/lib/escape_utils/0000755000175000017500000000000011772525275016662 5ustar tfheentfheenescape-utils-0.2.4/lib/escape_utils/version.rb0000644000175000017500000000005211772525275020671 0ustar tfheentfheenmodule EscapeUtils VERSION = "0.2.4" endescape-utils-0.2.4/lib/escape_utils/url/0000755000175000017500000000000011772525275017464 5ustar tfheentfheenescape-utils-0.2.4/lib/escape_utils/url/cgi.rb0000644000175000017500000000021511772525275020551 0ustar tfheentfheenclass CGI def self.escape(s) EscapeUtils.escape_url(s.to_s) end def self.unescape(s) EscapeUtils.unescape_url(s.to_s) end endescape-utils-0.2.4/lib/escape_utils/url/rack.rb0000644000175000017500000000035111772525275020730 0ustar tfheentfheenmodule Rack module Utils def escape(url) EscapeUtils.escape_url(url.to_s) end def unescape(url) EscapeUtils.unescape_url(url.to_s) end module_function :escape module_function :unescape end end escape-utils-0.2.4/lib/escape_utils/url/erb.rb0000644000175000017500000000026211772525275020561 0ustar tfheentfheenclass ERB module Util def url_encode(s) EscapeUtils.escape_url(s.to_s) end alias u url_encode module_function :u module_function :url_encode end endescape-utils-0.2.4/lib/escape_utils/url/uri.rb0000644000175000017500000000023211772525275020605 0ustar tfheentfheenmodule URI def self.escape(s, unsafe=nil) EscapeUtils.escape_uri(s.to_s) end def self.unescape(s) EscapeUtils.unescape_uri(s.to_s) end endescape-utils-0.2.4/lib/escape_utils/javascript/0000755000175000017500000000000011772525275021030 5ustar tfheentfheenescape-utils-0.2.4/lib/escape_utils/javascript/action_view.rb0000644000175000017500000000025011772525275023661 0ustar tfheentfheenmodule ActionView module Helpers module JavaScriptHelper def escape_javascript(s) EscapeUtils.escape_javascript(s.to_s) end end end end escape-utils-0.2.4/lib/escape_utils/html/0000755000175000017500000000000011772525275017626 5ustar tfheentfheenescape-utils-0.2.4/lib/escape_utils/html/cgi.rb0000644000175000017500000000026211772525275020715 0ustar tfheentfheenclass CGI extend ::EscapeUtils::HtmlSafety class << self alias escapeHTML _escape_html def unescapeHTML(s) EscapeUtils.unescape_html(s.to_s) end end endescape-utils-0.2.4/lib/escape_utils/html/haml.rb0000644000175000017500000000016011772525275021071 0ustar tfheentfheenmodule Haml module Helpers include ::EscapeUtils::HtmlSafety alias html_escape _escape_html end endescape-utils-0.2.4/lib/escape_utils/html/rack.rb0000644000175000017500000000022011772525275021065 0ustar tfheentfheenmodule Rack module Utils include ::EscapeUtils::HtmlSafety alias escape_html _escape_html module_function :escape_html end end escape-utils-0.2.4/lib/escape_utils/html/erb.rb0000644000175000017500000000027311772525275020725 0ustar tfheentfheenclass ERB module Util include ::EscapeUtils::HtmlSafety alias html_escape _escape_html alias h html_escape module_function :h module_function :html_escape end endescape-utils-0.2.4/lib/escape_utils/html_safety.rb0000644000175000017500000000052411772525275021527 0ustar tfheentfheenmodule EscapeUtils module HtmlSafety if "".respond_to? :html_safe? def _escape_html(s) if s.html_safe? s.to_s.html_safe else EscapeUtils.escape_html(s.to_s).html_safe end end else def _escape_html(s) EscapeUtils.escape_html(s.to_s) end end end end escape-utils-0.2.4/lib/escape_utils.rb0000644000175000017500000000116111772525275017206 0ustar tfheentfheenrequire 'escape_utils/escape_utils' require 'escape_utils/version' unless defined? EscapeUtils::VERSION module EscapeUtils extend self # turn on/off the escaping of the '/' character during HTML escaping # Escaping '/' is recommended by the OWASP - http://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Before_Inserting_Untrusted_Data_into_HTML_Element_Content # This is because quotes around HTML attributes are optional in most/all modern browsers at the time of writing (10/15/2010) @@html_secure = true autoload :HtmlSafety, 'escape_utils/html_safety' endescape-utils-0.2.4/escape_utils.gemspec0000644000175000017500000000175011772525275017464 0ustar tfheentfheenrequire './lib/escape_utils/version' unless defined? EscapeUtils::VERSION Gem::Specification.new do |s| s.name = %q{escape_utils} s.version = EscapeUtils::VERSION s.authors = ["Brian Lopez"] s.date = Time.now.utc.strftime("%Y-%m-%d") s.email = %q{seniorlopez@gmail.com} s.extensions = ["ext/escape_utils/extconf.rb"] s.files = `git ls-files`.split("\n") s.homepage = %q{http://github.com/brianmario/escape_utils} s.rdoc_options = ["--charset=UTF-8"] s.require_paths = ["lib", "ext"] s.rubygems_version = %q{1.4.2} s.summary = %q{Faster string escaping routines for your web apps} s.test_files = `git ls-files spec`.split("\n") # tests s.add_development_dependency 'rake-compiler', ">= 0.7.5" s.add_development_dependency 'rspec', ">= 2.0.0" # benchmarks s.add_development_dependency 'rack' s.add_development_dependency 'haml' s.add_development_dependency 'fast_xs' s.add_development_dependency 'actionpack' s.add_development_dependency 'url_escape' end