rails-html-sanitizer-1.0.4/0000755000004100000410000000000013255032721015626 5ustar www-datawww-datarails-html-sanitizer-1.0.4/rails-html-sanitizer.gemspec0000644000004100000410000000414713255032721023263 0ustar www-datawww-data######################################################### # This file has been automatically generated by gem2tgz # ######################################################### # -*- encoding: utf-8 -*- Gem::Specification.new do |s| s.name = "rails-html-sanitizer" s.version = "1.0.4" s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= s.authors = ["Rafael Mendon\u{e7}a Fran\u{e7}a", "Kasper Timm Hansen"] s.date = "2018-03-22" s.description = "HTML sanitization for Rails applications" s.email = ["rafaelmfranca@gmail.com", "kaspth@gmail.com"] s.files = ["CHANGELOG.md", "MIT-LICENSE", "README.md", "lib/rails-html-sanitizer.rb", "lib/rails/html/sanitizer.rb", "lib/rails/html/sanitizer/version.rb", "lib/rails/html/scrubbers.rb", "test/sanitizer_test.rb", "test/scrubbers_test.rb"] s.homepage = "https://github.com/rails/rails-html-sanitizer" s.licenses = ["MIT"] s.require_paths = ["lib"] s.rubygems_version = "1.8.23" s.summary = "This gem is responsible to sanitize HTML fragments in Rails applications." s.test_files = ["test/sanitizer_test.rb", "test/scrubbers_test.rb"] if s.respond_to? :specification_version then s.specification_version = 4 if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then s.add_development_dependency(%q, ["~> 1.3"]) s.add_runtime_dependency(%q, [">= 2.2.2", "~> 2.2"]) s.add_development_dependency(%q, [">= 0"]) s.add_development_dependency(%q, [">= 0"]) s.add_development_dependency(%q, [">= 0"]) else s.add_dependency(%q, ["~> 1.3"]) s.add_dependency(%q, [">= 2.2.2", "~> 2.2"]) s.add_dependency(%q, [">= 0"]) s.add_dependency(%q, [">= 0"]) s.add_dependency(%q, [">= 0"]) end else s.add_dependency(%q, ["~> 1.3"]) s.add_dependency(%q, [">= 2.2.2", "~> 2.2"]) s.add_dependency(%q, [">= 0"]) s.add_dependency(%q, [">= 0"]) s.add_dependency(%q, [">= 0"]) end end rails-html-sanitizer-1.0.4/MIT-LICENSE0000644000004100000410000000212313255032720017257 0ustar www-datawww-dataCopyright (c) 2013-2015 Rafael Mendonça França, Kasper Timm Hansen MIT License Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. rails-html-sanitizer-1.0.4/lib/0000755000004100000410000000000013255032720016373 5ustar www-datawww-datarails-html-sanitizer-1.0.4/lib/rails-html-sanitizer.rb0000644000004100000410000000411513255032720023003 0ustar www-datawww-datarequire "rails/html/sanitizer/version" require "loofah" require "rails/html/scrubbers" require "rails/html/sanitizer" module Rails module Html class Sanitizer class << self def full_sanitizer Html::FullSanitizer end def link_sanitizer Html::LinkSanitizer end def white_list_sanitizer Html::WhiteListSanitizer end end end end end module ActionView module Helpers module SanitizeHelper module ClassMethods # Replaces the allowed tags for the +sanitize+ helper. # # class Application < Rails::Application # config.action_view.sanitized_allowed_tags = 'table', 'tr', 'td' # end # def sanitized_allowed_tags=(tags) sanitizer_vendor.white_list_sanitizer.allowed_tags = tags end # Replaces the allowed HTML attributes for the +sanitize+ helper. # # class Application < Rails::Application # config.action_view.sanitized_allowed_attributes = ['onclick', 'longdesc'] # end # def sanitized_allowed_attributes=(attributes) sanitizer_vendor.white_list_sanitizer.allowed_attributes = attributes end [:protocol_separator, :uri_attributes, :bad_tags, :allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties, :allowed_protocols].each do |meth| meth_name = "sanitized_#{meth}" define_method(meth_name) { deprecate_option(meth_name) } define_method("#{meth_name}=") { |_| deprecate_option("#{meth_name}=") } end private def deprecate_option(name) ActiveSupport::Deprecation.warn "The #{name} option is deprecated " \ "and has no effect. Until Rails 5 the old behavior can still be " \ "installed. To do this add the `rails-deprecated-sanitizer` to " \ "your Gemfile. Consult the Rails 4.2 upgrade guide for more information." end end end end end rails-html-sanitizer-1.0.4/lib/rails/0000755000004100000410000000000013255032720017505 5ustar www-datawww-datarails-html-sanitizer-1.0.4/lib/rails/html/0000755000004100000410000000000013255032720020451 5ustar www-datawww-datarails-html-sanitizer-1.0.4/lib/rails/html/scrubbers.rb0000644000004100000410000001376713255032720023006 0ustar www-datawww-datamodule Rails module Html # === Rails::Html::PermitScrubber # # Rails::Html::PermitScrubber allows you to permit only your own tags and/or attributes. # # Rails::Html::PermitScrubber can be subclassed to determine: # - When a node should be skipped via +skip_node?+. # - When a node is allowed via +allowed_node?+. # - When an attribute should be scrubbed via +scrub_attribute?+. # # Subclasses don't need to worry if tags or attributes are set or not. # If tags or attributes are not set, Loofah's behavior will be used. # If you override +allowed_node?+ and no tags are set, it will not be called. # Instead Loofahs behavior will be used. # Likewise for +scrub_attribute?+ and attributes respectively. # # Text and CDATA nodes are skipped by default. # Unallowed elements will be stripped, i.e. element is removed but its subtree kept. # Supplied tags and attributes should be Enumerables. # # +tags=+ # If set, elements excluded will be stripped. # If not, elements are stripped based on Loofahs +HTML5::Scrub.allowed_element?+. # # +attributes=+ # If set, attributes excluded will be removed. # If not, attributes are removed based on Loofahs +HTML5::Scrub.scrub_attributes+. # # class CommentScrubber < Html::PermitScrubber # def initialize # super # self.tags = %w(form script comment blockquote) # end # # def skip_node?(node) # node.text? # end # # def scrub_attribute?(name) # name == "style" # end # end # # See the documentation for Nokogiri::XML::Node to understand what's possible # with nodes: http://nokogiri.org/Nokogiri/XML/Node.html class PermitScrubber < Loofah::Scrubber attr_reader :tags, :attributes def initialize @direction = :bottom_up @tags, @attributes = nil, nil end def tags=(tags) @tags = validate!(tags, :tags) end def attributes=(attributes) @attributes = validate!(attributes, :attributes) end def scrub(node) if node.cdata? text = node.document.create_text_node node.text node.replace text return CONTINUE end return CONTINUE if skip_node?(node) unless keep_node?(node) return STOP if scrub_node(node) == STOP end scrub_attributes(node) end protected def allowed_node?(node) @tags.include?(node.name) end def skip_node?(node) node.text? end def scrub_attribute?(name) !@attributes.include?(name) end def keep_node?(node) if @tags allowed_node?(node) else Loofah::HTML5::Scrub.allowed_element?(node.name) end end def scrub_node(node) node.before(node.children) # strip node.remove end def scrub_attributes(node) if @attributes node.attribute_nodes.each do |attr| attr.remove if scrub_attribute?(attr.name) scrub_attribute(node, attr) end scrub_css_attribute(node) else Loofah::HTML5::Scrub.scrub_attributes(node) end end def scrub_css_attribute(node) if Loofah::HTML5::Scrub.respond_to?(:scrub_css_attribute) Loofah::HTML5::Scrub.scrub_css_attribute(node) else style = node.attributes['style'] style.value = Loofah::HTML5::Scrub.scrub_css(style.value) if style end end def validate!(var, name) if var && !var.is_a?(Enumerable) raise ArgumentError, "You should pass :#{name} as an Enumerable" end var end def scrub_attribute(node, attr_node) attr_name = if attr_node.namespace "#{attr_node.namespace.prefix}:#{attr_node.node_name}" else attr_node.node_name end if Loofah::HTML5::WhiteList::ATTR_VAL_IS_URI.include?(attr_name) # this block lifted nearly verbatim from HTML5 sanitization val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(Loofah::HTML5::Scrub::CONTROL_CHARACTERS,'').downcase if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! Loofah::HTML5::WhiteList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(Loofah::HTML5::WhiteList::PROTOCOL_SEPARATOR)[0]) attr_node.remove end end if Loofah::HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name) attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value end if Loofah::HTML5::WhiteList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m attr_node.remove end node.remove_attribute(attr_node.name) if attr_name == 'src' && attr_node.value !~ /[^[:space:]]/ Loofah::HTML5::Scrub.force_correct_attribute_escaping! node end end # === Rails::Html::TargetScrubber # # Where Rails::Html::PermitScrubber picks out tags and attributes to permit in # sanitization, Rails::Html::TargetScrubber targets them for removal. # # +tags=+ # If set, elements included will be stripped. # # +attributes=+ # If set, attributes included will be removed. class TargetScrubber < PermitScrubber def allowed_node?(node) !super end def scrub_attribute?(name) !super end end # === Rails::Html::TextOnlyScrubber # # Rails::Html::TextOnlyScrubber allows you to permit text nodes. # # Unallowed elements will be stripped, i.e. element is removed but its subtree kept. class TextOnlyScrubber < Loofah::Scrubber def initialize @direction = :bottom_up end def scrub(node) if node.text? CONTINUE else node.before node.children node.remove end end end end end rails-html-sanitizer-1.0.4/lib/rails/html/sanitizer/0000755000004100000410000000000013255032720022461 5ustar www-datawww-datarails-html-sanitizer-1.0.4/lib/rails/html/sanitizer/version.rb0000644000004100000410000000013113255032720024466 0ustar www-datawww-datamodule Rails module Html class Sanitizer VERSION = "1.0.4" end end end rails-html-sanitizer-1.0.4/lib/rails/html/sanitizer.rb0000644000004100000410000001233713255032720023014 0ustar www-datawww-datamodule Rails module Html XPATHS_TO_REMOVE = %w{.//script .//form comment()} class Sanitizer # :nodoc: def sanitize(html, options = {}) raise NotImplementedError, "subclasses must implement sanitize method." end private def remove_xpaths(node, xpaths) node.xpath(*xpaths).remove node end def properly_encode(fragment, options) fragment.xml? ? fragment.to_xml(options) : fragment.to_html(options) end end # === Rails::Html::FullSanitizer # Removes all tags but strips out scripts, forms and comments. # # full_sanitizer = Rails::Html::FullSanitizer.new # full_sanitizer.sanitize("Bold no more! See more here...") # # => Bold no more! See more here... class FullSanitizer < Sanitizer def sanitize(html, options = {}) return unless html return html if html.empty? loofah_fragment = Loofah.fragment(html) remove_xpaths(loofah_fragment, XPATHS_TO_REMOVE) loofah_fragment.scrub!(TextOnlyScrubber.new) properly_encode(loofah_fragment, encoding: 'UTF-8') end end # === Rails::Html::LinkSanitizer # Removes a tags and href attributes leaving only the link text # # link_sanitizer = Rails::Html::LinkSanitizer.new # link_sanitizer.sanitize('Only the link text will be kept.') # # => Only the link text will be kept. class LinkSanitizer < Sanitizer def initialize @link_scrubber = TargetScrubber.new @link_scrubber.tags = %w(a href) @link_scrubber.attributes = %w(href) end def sanitize(html, options = {}) Loofah.scrub_fragment(html, @link_scrubber).to_s end end # === Rails::Html::WhiteListSanitizer # Sanitizes html and css from an extensive white list (see link further down). # # === Whitespace # We can't make any guarantees about whitespace being kept or stripped. # Loofah uses Nokogiri, which wraps either a C or Java parser for the # respective Ruby implementation. # Those two parsers determine how whitespace is ultimately handled. # # When the stripped markup will be rendered the users browser won't take # whitespace into account anyway. It might be better to suggest your users # wrap their whitespace sensitive content in pre tags or that you do # so automatically. # # === Options # Sanitizes both html and css via the white lists found here: # https://github.com/flavorjones/loofah/blob/master/lib/loofah/html5/whitelist.rb # # WhiteListSanitizer also accepts options to configure # the white list used when sanitizing html. # There's a class level option: # Rails::Html::WhiteListSanitizer.allowed_tags = %w(table tr td) # Rails::Html::WhiteListSanitizer.allowed_attributes = %w(id class style) # # Tags and attributes can also be passed to +sanitize+. # Passed options take precedence over the class level options. # # === Examples # white_list_sanitizer = Rails::Html::WhiteListSanitizer.new # # Sanitize css doesn't take options # white_list_sanitizer.sanitize_css('background-color: #000;') # # Default: sanitize via a extensive white list of allowed elements # white_list_sanitizer.sanitize(@article.body) # # White list via the supplied tags and attributes # white_list_sanitizer.sanitize(@article.body, tags: %w(table tr td), # attributes: %w(id class style)) # # White list via a custom scrubber # white_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new) class WhiteListSanitizer < Sanitizer class << self attr_accessor :allowed_tags attr_accessor :allowed_attributes end self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr acronym a img blockquote del ins)) self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr)) def initialize @permit_scrubber = PermitScrubber.new end def sanitize(html, options = {}) return unless html return html if html.empty? loofah_fragment = Loofah.fragment(html) if scrubber = options[:scrubber] # No duck typing, Loofah ensures subclass of Loofah::Scrubber loofah_fragment.scrub!(scrubber) elsif allowed_tags(options) || allowed_attributes(options) @permit_scrubber.tags = allowed_tags(options) @permit_scrubber.attributes = allowed_attributes(options) loofah_fragment.scrub!(@permit_scrubber) else remove_xpaths(loofah_fragment, XPATHS_TO_REMOVE) loofah_fragment.scrub!(:strip) end properly_encode(loofah_fragment, encoding: 'UTF-8') end def sanitize_css(style_string) Loofah::HTML5::Scrub.scrub_css(style_string) end private def allowed_tags(options) options[:tags] || self.class.allowed_tags end def allowed_attributes(options) options[:attributes] || self.class.allowed_attributes end end end end rails-html-sanitizer-1.0.4/test/0000755000004100000410000000000013255032720016604 5ustar www-datawww-datarails-html-sanitizer-1.0.4/test/sanitizer_test.rb0000644000004100000410000005116213255032720022205 0ustar www-datawww-datarequire "minitest/autorun" require "rails-html-sanitizer" require "rails/dom/testing/assertions/dom_assertions" class SanitizersTest < Minitest::Test include Rails::Dom::Testing::Assertions::DomAssertions def test_sanitizer_sanitize_raises_not_implemented_error assert_raises NotImplementedError do Rails::Html::Sanitizer.new.sanitize('') end end def test_sanitize_nested_script sanitizer = Rails::Html::WhiteListSanitizer.new assert_equal '<script>alert("XSS");</script>', sanitizer.sanitize('alert("XSS");/', tags: %w(em)) end def test_sanitize_nested_script_in_style sanitizer = Rails::Html::WhiteListSanitizer.new assert_equal '<script>alert("XSS");</script>', sanitizer.sanitize('alert("XSS");/', tags: %w(em)) end class XpathRemovalTestSanitizer < Rails::Html::Sanitizer def sanitize(html, options = {}) fragment = Loofah.fragment(html) remove_xpaths(fragment, options[:xpaths]).to_s end end def test_remove_xpaths_removes_an_xpath html = %(

hello

) assert_equal %(

hello

), xpath_sanitize(html, xpaths: %w(.//script)) end def test_remove_xpaths_removes_all_occurrences_of_xpath html = %(

hello

) assert_equal %(

hello

), xpath_sanitize(html, xpaths: %w(.//script)) end def test_remove_xpaths_called_with_faulty_xpath assert_raises Nokogiri::XML::XPath::SyntaxError do xpath_sanitize('

hello

', xpaths: %w(..faulty_xpath)) end end def test_remove_xpaths_called_with_xpath_string assert_equal '', xpath_sanitize('', xpaths: './/a') end def test_remove_xpaths_called_with_enumerable_xpaths assert_equal '', xpath_sanitize('', xpaths: %w(.//a .//span)) end def test_strip_tags_with_quote input = '<" hi' assert_equal ' hi', full_sanitize(input) end def test_strip_invalid_html assert_equal "<<", full_sanitize("<<This is a test.\n\n\n\n

It no longer contains any HTML.

\n} assert_equal expected, full_sanitize(input) end def test_remove_unclosed_tags assert_equal "This is ", full_sanitize("This is <-- not\n a comment here.") end def test_strip_cdata assert_equal "This has a ]]> here.", full_sanitize("This has a ]]> here.") end def test_strip_unclosed_cdata assert_equal "This has an unclosed ]] here...", full_sanitize("This has an unclosed ]] here...") end def test_strip_blank_string assert_nil full_sanitize(nil) assert_equal "", full_sanitize("") assert_equal " ", full_sanitize(" ") end def test_strip_tags_with_plaintext assert_equal "Dont touch me", full_sanitize("Dont touch me") end def test_strip_tags_with_tags assert_equal "This is a test.", full_sanitize("

This is a test.

") end def test_escape_tags_with_many_open_quotes assert_equal "<<", full_sanitize("<<") end def test_strip_tags_with_sentence assert_equal "This is a test.", full_sanitize("This is a test.") end def test_strip_tags_with_comment assert_equal "This has a here.", full_sanitize("This has a here.") end def test_strip_tags_with_frozen_string assert_equal "Frozen string with no tags", full_sanitize("Frozen string with no tags".freeze) end def test_full_sanitize_respect_html_escaping_of_the_given_string assert_equal 'test\r\nstring', full_sanitize('test\r\nstring') assert_equal '&', full_sanitize('&') assert_equal '&', full_sanitize('&') assert_equal '&amp;', full_sanitize('&amp;') assert_equal 'omg <script>BOM</script>', full_sanitize('omg <script>BOM</script>') end def test_strip_links_with_tags_in_tags expected = "<a href='hello'>all day long</a>" input = "<a href='hello'>all day long</a>" assert_equal expected, link_sanitize(input) end def test_strip_links_with_unclosed_tags assert_equal "", link_sanitize("on my mind\nall day long") end def test_strip_links_leaves_nonlink_tags assert_equal "My mind\nall day long", link_sanitize("My mind\nall day long") end def test_strip_links_with_links assert_equal "0wn3d", link_sanitize("0wn3d") end def test_strip_links_with_linkception assert_equal "Magic", link_sanitize("Magic") end def test_strip_links_with_a_tag_in_href assert_equal "FrrFox", link_sanitize("FrrFox") end def test_sanitize_form assert_sanitized "
", '' end def test_sanitize_plaintext assert_sanitized "<span>foo</span></plaintext>", "<span>foo</span>" end def test_sanitize_script assert_sanitized "a b c<script language=\"Javascript\">blah blah blah</script>d e f", "a b cblah blah blahd e f" end def test_sanitize_js_handlers raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>} assert_sanitized raw, %{onthis="do that" <a href="#" name="foo">hello</a>} end def test_sanitize_javascript_href raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>} assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>} end def test_sanitize_image_src raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>} assert_sanitized raw, %{src="javascript:bang" <img width="5">foo</img>, <span>bar</span>} end tags = Loofah::HTML5::WhiteList::ALLOWED_ELEMENTS - %w(script form) tags.each do |tag_name| define_method "test_should_allow_#{tag_name}_tag" do scope_allowed_tags(tags) do assert_sanitized "start <#{tag_name} title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</#{tag_name}> end", %(start <#{tag_name} title="1">foo bar baz</#{tag_name}> end) end end end def test_should_allow_anchors assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href=\"foo\">baz</a>) end def test_video_poster_sanitization scope_allowed_tags(%w(video)) do scope_allowed_attributes %w(src poster) do assert_sanitized %(<video src="videofile.ogg" autoplay poster="posterimage.jpg"></video>), %(<video src="videofile.ogg" poster="posterimage.jpg"></video>) assert_sanitized %(<video src="videofile.ogg" poster=javascript:alert(1)></video>), %(<video src="videofile.ogg"></video>) end end end # RFC 3986, sec 4.2 def test_allow_colons_in_path_component assert_sanitized "<a href=\"./this:that\">foo</a>" end %w(src width height alt).each do |img_attr| define_method "test_should_allow_image_#{img_attr}_attribute" do assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo" />) end end def test_should_handle_non_html assert_sanitized 'abc' end def test_should_handle_blank_text [nil, '', ' '].each { |blank| assert_sanitized blank } end def test_setting_allowed_tags_affects_sanitization scope_allowed_tags %w(u) do |sanitizer| assert_equal '<u></u>', sanitizer.sanitize('<a><u></u></a>') end end def test_setting_allowed_attributes_affects_sanitization scope_allowed_attributes %w(foo) do |sanitizer| input = '<a foo="hello" bar="world"></a>' assert_equal '<a foo="hello"></a>', sanitizer.sanitize(input) end end def test_custom_tags_overrides_allowed_tags scope_allowed_tags %(u) do |sanitizer| input = '<a><u></u></a>' assert_equal '<a></a>', sanitizer.sanitize(input, tags: %w(a)) end end def test_custom_attributes_overrides_allowed_attributes scope_allowed_attributes %(foo) do |sanitizer| input = '<a foo="hello" bar="world"></a>' assert_equal '<a bar="world"></a>', sanitizer.sanitize(input, attributes: %w(bar)) end end def test_should_allow_custom_tags text = "<u>foo</u>" assert_equal text, white_list_sanitize(text, tags: %w(u)) end def test_should_allow_only_custom_tags text = "<u>foo</u> with <i>bar</i>" assert_equal "<u>foo</u> with bar", white_list_sanitize(text, tags: %w(u)) end def test_should_allow_custom_tags_with_attributes text = %(<blockquote cite="http://example.com/">foo</blockquote>) assert_equal text, white_list_sanitize(text) end def test_should_allow_custom_tags_with_custom_attributes text = %(<blockquote foo="bar">Lorem ipsum</blockquote>) assert_equal text, white_list_sanitize(text, attributes: ['foo']) end def test_scrub_style_if_style_attribute_option_is_passed input = '<p style="color: #000; background-image: url(http://www.ragingplatypus.com/i/cam-full.jpg);"></p>' assert_equal '<p style="color: #000;"></p>', white_list_sanitize(input, attributes: %w(style)) end def test_should_raise_argument_error_if_tags_is_not_enumerable assert_raises ArgumentError do white_list_sanitize('<a>some html</a>', tags: 'foo') end end def test_should_raise_argument_error_if_attributes_is_not_enumerable assert_raises ArgumentError do white_list_sanitize('<a>some html</a>', attributes: 'foo') end end def test_should_not_accept_non_loofah_inheriting_scrubber scrubber = Object.new def scrubber.scrub(node); node.name = 'h1'; end assert_raises Loofah::ScrubberNotFound do white_list_sanitize('<a>some html</a>', scrubber: scrubber) end end def test_should_accept_loofah_inheriting_scrubber scrubber = Loofah::Scrubber.new def scrubber.scrub(node); node.name = 'h1'; end html = "<script>hello!</script>" assert_equal "<h1>hello!</h1>", white_list_sanitize(html, scrubber: scrubber) end def test_should_accept_loofah_scrubber_that_wraps_a_block scrubber = Loofah::Scrubber.new { |node| node.name = 'h1' } html = "<script>hello!</script>" assert_equal "<h1>hello!</h1>", white_list_sanitize(html, scrubber: scrubber) end def test_custom_scrubber_takes_precedence_over_other_options scrubber = Loofah::Scrubber.new { |node| node.name = 'h1' } html = "<script>hello!</script>" assert_equal "<h1>hello!</h1>", white_list_sanitize(html, scrubber: scrubber, tags: ['foo']) end [%w(img src), %w(a href)].each do |(tag, attr)| define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do assert_sanitized %(<#{tag} #{attr}="javascript:bang" title="1">boo</#{tag}>), %(<#{tag} title="1">boo</#{tag}>) end end def test_should_block_script_tag assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), "" end def test_should_not_fall_for_xss_image_hack_with_uppercase_tags assert_sanitized %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">), %(<img>alert("XSS")"&gt;) end [%(<IMG SRC="javascript:alert('XSS');">), %(<IMG SRC=javascript:alert('XSS')>), %(<IMG SRC=JaVaScRiPt:alert('XSS')>), %(<IMG SRC=javascript:alert(&quot;XSS&quot;)>), %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>), %(<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>), %(<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>), %(<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>), %(<IMG SRC="jav\tascript:alert('XSS');">), %(<IMG SRC="jav&#x09;ascript:alert('XSS');">), %(<IMG SRC="jav&#x0A;ascript:alert('XSS');">), %(<IMG SRC="jav&#x0D;ascript:alert('XSS');">), %(<IMG SRC=" &#14; javascript:alert('XSS');">), %(<IMG SRC="javascript&#x3a;alert('XSS');">), %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each do |img_hack| define_method "test_should_not_fall_for_xss_image_hack_#{img_hack}" do assert_sanitized img_hack, "<img>" end end def test_should_sanitize_tag_broken_up_by_null assert_sanitized %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>), "" end def test_should_sanitize_invalid_script_tag assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), "" end def test_should_sanitize_script_tag_with_multiple_open_brackets assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;alert(\"XSS\");//&lt;" assert_sanitized %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a), "" end def test_should_sanitize_unclosed_script assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), "" end def test_should_sanitize_half_open_scripts assert_sanitized %(<IMG SRC="javascript:alert('XSS')"), "<img>" end def test_should_not_fall_for_ridiculous_hack img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>) assert_sanitized img_hack, "<img>" end def test_should_sanitize_attributes assert_sanitized %(<SPAN title="'><script>alert()</script>">blah</SPAN>), %(<span title="#{CGI.escapeHTML "'><script>alert()</script>"}">blah</span>) end def test_should_sanitize_illegal_style_properties raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;) expected = %(display:block;width:100%;height:100%;background-color:black;background-x:center;background-y:center;) assert_equal expected, sanitize_css(raw) end def test_should_sanitize_with_trailing_space raw = "display:block; " expected = "display:block;" assert_equal expected, sanitize_css(raw) end def test_should_sanitize_xul_style_attributes raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')) assert_equal '', sanitize_css(raw) end def test_should_sanitize_invalid_tag_names assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f") end def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>") end def test_should_sanitize_invalid_tag_names_in_single_tags assert_sanitized('<img/src="http://ha.ckers.org/xss.js"/>', "<img />") end def test_should_sanitize_img_dynsrc_lowsrc assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img />") end def test_should_sanitize_div_background_image_unicode_encoded raw = %(background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029) assert_equal '', sanitize_css(raw) end def test_should_sanitize_div_style_expression raw = %(width: expression(alert('XSS'));) assert_equal '', sanitize_css(raw) end def test_should_sanitize_across_newlines raw = %(\nwidth:\nexpression(alert('XSS'));\n) assert_equal '', sanitize_css(raw) end def test_should_sanitize_img_vbscript assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), '<img />' end def test_should_sanitize_cdata_section assert_sanitized "<![CDATA[<span>section</span>]]>", "section]]&gt;" end def test_should_sanitize_unterminated_cdata_section assert_sanitized "<![CDATA[<span>neverending...", "neverending..." end def test_should_not_mangle_urls_with_ampersand assert_sanitized %{<a href=\"http://www.domain.com?var1=1&amp;var2=2\">my link</a>} end def test_should_sanitize_neverending_attribute assert_sanitized "<span class=\"\\", "<span class=\"\\\">" end [ %(<a href="javascript&#x3a;alert('XSS');">), %(<a href="javascript&#x003a;alert('XSS');">), %(<a href="javascript&#x3A;alert('XSS');">), %(<a href="javascript&#x003A;alert('XSS');">) ].each_with_index do |enc_hack, i| define_method "test_x03a_handling_#{i+1}" do assert_sanitized enc_hack, "<a>" end end def test_x03a_legitimate assert_sanitized %(<a href="http&#x3a;//legit">), %(<a href="http://legit">) assert_sanitized %(<a href="http&#x3A;//legit">), %(<a href="http://legit">) end def test_sanitize_ascii_8bit_string white_list_sanitize('<a>hello</a>'.encode('ASCII-8BIT')).tap do |sanitized| assert_equal '<a>hello</a>', sanitized assert_equal Encoding::UTF_8, sanitized.encoding end end def test_sanitize_data_attributes assert_sanitized %(<a href="/blah" data-method="post">foo</a>), %(<a href="/blah">foo</a>) assert_sanitized %(<a data-remote="true" data-type="script" data-method="get" data-cross-domain="true" href="attack.js">Launch the missiles</a>), %(<a href="attack.js">Launch the missiles</a>) end def test_allow_data_attribute_if_requested text = %(<a data-foo="foo">foo</a>) assert_equal %(<a data-foo="foo">foo</a>), white_list_sanitize(text, attributes: ['data-foo']) end def test_uri_escaping_of_href_attr_in_a_tag_in_white_list_sanitizer html = %{<a href='examp<!--" unsafeattr=foo()>-->le.com'>test</a>} text = white_list_sanitize(html) assert_equal %{<a href="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>}, text end def test_uri_escaping_of_src_attr_in_a_tag_in_white_list_sanitizer html = %{<a src='examp<!--" unsafeattr=foo()>-->le.com'>test</a>} text = white_list_sanitize(html) assert_equal %{<a src="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>}, text end def test_uri_escaping_of_name_attr_in_a_tag_in_white_list_sanitizer html = %{<a name='examp<!--" unsafeattr=foo()>-->le.com'>test</a>} text = white_list_sanitize(html) assert_equal %{<a name="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>}, text end def test_uri_escaping_of_name_action_in_a_tag_in_white_list_sanitizer html = %{<a action='examp<!--" unsafeattr=foo()>-->le.com'>test</a>} text = white_list_sanitize(html, attributes: ['action']) assert_equal %{<a action="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>}, text end protected def xpath_sanitize(input, options = {}) XpathRemovalTestSanitizer.new.sanitize(input, options) end def full_sanitize(input, options = {}) Rails::Html::FullSanitizer.new.sanitize(input, options) end def link_sanitize(input, options = {}) Rails::Html::LinkSanitizer.new.sanitize(input, options) end def white_list_sanitize(input, options = {}) Rails::Html::WhiteListSanitizer.new.sanitize(input, options) end def assert_sanitized(input, expected = nil) if input assert_dom_equal expected || input, white_list_sanitize(input) else assert_nil white_list_sanitize(input) end end def sanitize_css(input) Rails::Html::WhiteListSanitizer.new.sanitize_css(input) end def scope_allowed_tags(tags) old_tags = Rails::Html::WhiteListSanitizer.allowed_tags Rails::Html::WhiteListSanitizer.allowed_tags = tags yield Rails::Html::WhiteListSanitizer.new ensure Rails::Html::WhiteListSanitizer.allowed_tags = old_tags end def scope_allowed_attributes(attributes) old_attributes = Rails::Html::WhiteListSanitizer.allowed_attributes Rails::Html::WhiteListSanitizer.allowed_attributes = attributes yield Rails::Html::WhiteListSanitizer.new ensure Rails::Html::WhiteListSanitizer.allowed_attributes = old_attributes end end ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������rails-html-sanitizer-1.0.4/test/scrubbers_test.rb���������������������������������������������������0000644�0000041�0000041�00000010633�13255032720�022165� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������require "minitest/autorun" require "rails-html-sanitizer" class ScrubberTest < Minitest::Test protected def assert_scrubbed(html, expected = html) output = Loofah.scrub_fragment(html, @scrubber).to_s assert_equal expected, output end def to_node(text) Loofah.fragment(text).children.first end def assert_node_skipped(text) assert_scrub_returns(Loofah::Scrubber::CONTINUE, text) end def assert_scrub_stopped(text) assert_scrub_returns(Loofah::Scrubber::STOP, text) end def assert_scrub_returns(return_value, text) node = to_node(text) assert_equal return_value, @scrubber.scrub(node) end end class PermitScrubberTest < ScrubberTest def setup @scrubber = Rails::Html::PermitScrubber.new end def test_responds_to_scrub assert @scrubber.respond_to?(:scrub) end def test_default_scrub_behavior assert_scrubbed '<tag>hello</tag>', 'hello' end def test_default_attributes_removal_behavior assert_scrubbed '<p cooler="hello">hello</p>', '<p>hello</p>' end def test_leaves_supplied_tags @scrubber.tags = %w(a) assert_scrubbed '<a>hello</a>' end def test_leaves_only_supplied_tags html = '<tag>leave me <span>now</span></tag>' @scrubber.tags = %w(tag) assert_scrubbed html, '<tag>leave me now</tag>' end def test_leaves_only_supplied_tags_nested html = '<tag>leave <em>me <span>now</span></em></tag>' @scrubber.tags = %w(tag) assert_scrubbed html, '<tag>leave me now</tag>' end def test_leaves_supplied_attributes @scrubber.attributes = %w(cooler) assert_scrubbed '<a cooler="hello"></a>' end def test_leaves_only_supplied_attributes @scrubber.attributes = %w(cooler) assert_scrubbed '<a cooler="hello" b="c" d="e"></a>', '<a cooler="hello"></a>' end def test_leaves_supplied_tags_and_attributes @scrubber.tags = %w(tag) @scrubber.attributes = %w(cooler) assert_scrubbed '<tag cooler="hello"></tag>' end def test_leaves_only_supplied_tags_and_attributes @scrubber.tags = %w(tag) @scrubber.attributes = %w(cooler) html = '<a></a><tag href=""></tag><tag cooler=""></tag>' assert_scrubbed html, '<tag></tag><tag cooler=""></tag>' end def test_leaves_text assert_scrubbed('some text') end def test_skips_text_nodes assert_node_skipped('some text') end def test_tags_accessor_validation e = assert_raises(ArgumentError) do @scrubber.tags = 'tag' end assert_equal "You should pass :tags as an Enumerable", e.message assert_nil @scrubber.tags, "Tags should be nil when validation fails" end def test_attributes_accessor_validation e = assert_raises(ArgumentError) do @scrubber.attributes = 'cooler' end assert_equal "You should pass :attributes as an Enumerable", e.message assert_nil @scrubber.attributes, "Attributes should be nil when validation fails" end end class TargetScrubberTest < ScrubberTest def setup @scrubber = Rails::Html::TargetScrubber.new end def test_targeting_tags_removes_only_them @scrubber.tags = %w(a h1) html = '<script></script><a></a><h1></h1>' assert_scrubbed html, '<script></script>' end def test_targeting_tags_removes_only_them_nested @scrubber.tags = %w(a) html = '<tag><a><tag><a></a></tag></a></tag>' assert_scrubbed html, '<tag><tag></tag></tag>' end def test_targeting_attributes_removes_only_them @scrubber.attributes = %w(class id) html = '<a class="a" id="b" onclick="c"></a>' assert_scrubbed html, '<a onclick="c"></a>' end def test_targeting_tags_and_attributes_removes_only_them @scrubber.tags = %w(tag) @scrubber.attributes = %w(remove) html = '<tag remove="" other=""></tag><a remove="" other=""></a>' assert_scrubbed html, '<a other=""></a>' end end class TextOnlyScrubberTest < ScrubberTest def setup @scrubber = Rails::Html::TextOnlyScrubber.new end def test_removes_all_tags_and_keep_the_content assert_scrubbed '<tag>hello</tag>', 'hello' end def test_skips_text_nodes assert_node_skipped('some text') end end class ReturningStopFromScrubNodeTest < ScrubberTest class ScrubStopper < Rails::Html::PermitScrubber def scrub_node(node) Loofah::Scrubber::STOP end end def setup @scrubber = ScrubStopper.new end def test_returns_stop_from_scrub_if_scrub_node_does assert_scrub_stopped '<script>remove me</script>' end end �����������������������������������������������������������������������������������������������������rails-html-sanitizer-1.0.4/CHANGELOG.md�������������������������������������������������������������0000644�0000041�0000041�00000000126�13255032720�017435� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## 1.0.1 * Added support for Rails 4.2.0.beta2 and above ## 1.0.0 * First release. ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������rails-html-sanitizer-1.0.4/README.md����������������������������������������������������������������0000644�0000041�0000041�00000007467�13255032720�017122� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Rails Html Sanitizers In Rails 4.2 and above this gem will be responsible for sanitizing HTML fragments in Rails applications, i.e. in the `sanitize`, `sanitize_css`, `strip_tags` and `strip_links` methods. Rails Html Sanitizer is only intended to be used with Rails applications. If you need similar functionality in non Rails apps consider using [Loofah](https://github.com/flavorjones/loofah) directly (that's what handles sanitization under the hood). ## Installation Add this line to your application's Gemfile: gem 'rails-html-sanitizer' And then execute: $ bundle Or install it yourself as: $ gem install rails-html-sanitizer ## Usage ### Sanitizers All sanitizers respond to `sanitize`. #### FullSanitizer ```ruby full_sanitizer = Rails::Html::FullSanitizer.new full_sanitizer.sanitize("<b>Bold</b> no more! <a href='more.html'>See more here</a>...") # => Bold no more! See more here... ``` #### LinkSanitizer ```ruby link_sanitizer = Rails::Html::LinkSanitizer.new link_sanitizer.sanitize('<a href="example.com">Only the link text will be kept.</a>') # => Only the link text will be kept. ``` #### WhiteListSanitizer ```ruby white_list_sanitizer = Rails::Html::WhiteListSanitizer.new # sanitize via an extensive white list of allowed elements white_list_sanitizer.sanitize(@article.body) # white list only the supplied tags and attributes white_list_sanitizer.sanitize(@article.body, tags: %w(table tr td), attributes: %w(id class style)) # white list via a custom scrubber white_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new) # white list sanitizer can also sanitize css white_list_sanitizer.sanitize_css('background-color: #000;') ``` ### Scrubbers Scrubbers are objects responsible for removing nodes or attributes you don't want in your HTML document. This gem includes two scrubbers `Rails::Html::PermitScrubber` and `Rails::Html::TargetScrubber`. #### `Rails::Html::PermitScrubber` This scrubber allows you to permit only the tags and attributes you want. ```ruby scrubber = Rails::Html::PermitScrubber.new scrubber.tags = ['a'] html_fragment = Loofah.fragment('<a><img/ ></a>') html_fragment.scrub!(scrubber) html_fragment.to_s # => "<a></a>" ``` #### `Rails::Html::TargetScrubber` Where `PermitScrubber` picks out tags and attributes to permit in sanitization, `Rails::Html::TargetScrubber` targets them for removal. ```ruby scrubber = Rails::Html::TargetScrubber.new scrubber.tags = ['img'] html_fragment = Loofah.fragment('<a><img/ ></a>') html_fragment.scrub!(scrubber) html_fragment.to_s # => "<a></a>" ``` #### Custom Scrubbers You can also create custom scrubbers in your application if you want to. ```ruby class CommentScrubber < Rails::Html::PermitScrubber def initialize super self.tags = %w( form script comment blockquote ) self.attributes = %w( style ) end def skip_node?(node) node.text? end end ``` See `Rails::Html::PermitScrubber` documentation to learn more about which methods can be overridden. #### Custom Scrubber in a Rails app Using the `CommentScrubber` from above, you can use this in a Rails view like so: ```ruby <%= sanitize @comment, scrubber: CommentScrubber.new %> ``` ## Read more Loofah is what underlies the sanitizers and scrubbers of rails-html-sanitizer. - [Loofah and Loofah Scrubbers](https://github.com/flavorjones/loofah) The `node` argument passed to some methods in a custom scrubber is an instance of `Nokogiri::XML::Node`. - [`Nokogiri::XML::Node`](http://nokogiri.org/Nokogiri/XML/Node.html) - [Nokogiri](http://nokogiri.org) ## Contributing to Rails Html Sanitizers Rails Html Sanitizers is work of many contributors. You're encouraged to submit pull requests, propose features and discuss issues. See [CONTRIBUTING](CONTRIBUTING.md). ## License Rails Html Sanitizers is released under the [MIT License](MIT-LICENSE). �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������