rails-html-sanitizer-1.0.3/ 0000755 0000041 0000041 00000000000 12651763325 015637 5 ustar www-data www-data rails-html-sanitizer-1.0.3/lib/ 0000755 0000041 0000041 00000000000 12651763325 016405 5 ustar www-data www-data rails-html-sanitizer-1.0.3/lib/rails-html-sanitizer.rb 0000644 0000041 0000041 00000004115 12651763325 023015 0 ustar www-data www-data require "rails/html/sanitizer/version" require "loofah" require "rails/html/scrubbers" require "rails/html/sanitizer" module Rails module Html class Sanitizer class << self def full_sanitizer Html::FullSanitizer end def link_sanitizer Html::LinkSanitizer end def white_list_sanitizer Html::WhiteListSanitizer end end end end end module ActionView module Helpers module SanitizeHelper module ClassMethods # Replaces the allowed tags for the +sanitize+ helper. # # class Application < Rails::Application # config.action_view.sanitized_allowed_tags = 'table', 'tr', 'td' # end # def sanitized_allowed_tags=(tags) sanitizer_vendor.white_list_sanitizer.allowed_tags = tags end # Replaces the allowed HTML attributes for the +sanitize+ helper. # # class Application < Rails::Application # config.action_view.sanitized_allowed_attributes = ['onclick', 'longdesc'] # end # def sanitized_allowed_attributes=(attributes) sanitizer_vendor.white_list_sanitizer.allowed_attributes = attributes end [:protocol_separator, :uri_attributes, :bad_tags, :allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties, :allowed_protocols].each do |meth| meth_name = "sanitized_#{meth}" define_method(meth_name) { deprecate_option(meth_name) } define_method("#{meth_name}=") { |_| deprecate_option("#{meth_name}=") } end private def deprecate_option(name) ActiveSupport::Deprecation.warn "The #{name} option is deprecated " \ "and has no effect. Until Rails 5 the old behavior can still be " \ "installed. To do this add the `rails-deprecated-sanitizer` to " \ "your Gemfile. Consult the Rails 4.2 upgrade guide for more information." end end end end end rails-html-sanitizer-1.0.3/lib/rails/ 0000755 0000041 0000041 00000000000 12651763325 017517 5 ustar www-data www-data rails-html-sanitizer-1.0.3/lib/rails/html/ 0000755 0000041 0000041 00000000000 12651763325 020463 5 ustar www-data www-data rails-html-sanitizer-1.0.3/lib/rails/html/scrubbers.rb 0000644 0000041 0000041 00000013664 12651763325 023014 0 ustar www-data www-data module Rails module Html # === Rails::Html::PermitScrubber # # Rails::Html::PermitScrubber allows you to permit only your own tags and/or attributes. # # Rails::Html::PermitScrubber can be subclassed to determine: # - When a node should be skipped via +skip_node?+. # - When a node is allowed via +allowed_node?+. # - When an attribute should be scrubbed via +scrub_attribute?+. # # Subclasses don't need to worry if tags or attributes are set or not. # If tags or attributes are not set, Loofah's behavior will be used. # If you override +allowed_node?+ and no tags are set, it will not be called. # Instead Loofahs behavior will be used. # Likewise for +scrub_attribute?+ and attributes respectively. # # Text and CDATA nodes are skipped by default. # Unallowed elements will be stripped, i.e. element is removed but its subtree kept. # Supplied tags and attributes should be Enumerables. # # +tags=+ # If set, elements excluded will be stripped. # If not, elements are stripped based on Loofahs +HTML5::Scrub.allowed_element?+. # # +attributes=+ # If set, attributes excluded will be removed. # If not, attributes are removed based on Loofahs +HTML5::Scrub.scrub_attributes+. # # class CommentScrubber < Html::PermitScrubber # def allowed_node?(node) # !%w(form script comment blockquote).include?(node.name) # end # # def skip_node?(node) # node.text? # end # # def scrub_attribute?(name) # name == "style" # end # end # # See the documentation for Nokogiri::XML::Node to understand what's possible # with nodes: http://nokogiri.org/Nokogiri/XML/Node.html class PermitScrubber < Loofah::Scrubber attr_reader :tags, :attributes def initialize @direction = :bottom_up @tags, @attributes = nil, nil end def tags=(tags) @tags = validate!(tags, :tags) end def attributes=(attributes) @attributes = validate!(attributes, :attributes) end def scrub(node) if node.cdata? text = node.document.create_text_node node.text node.replace text return CONTINUE end return CONTINUE if skip_node?(node) unless keep_node?(node) return STOP if scrub_node(node) == STOP end scrub_attributes(node) end protected def allowed_node?(node) @tags.include?(node.name) end def skip_node?(node) node.text? end def scrub_attribute?(name) !@attributes.include?(name) end def keep_node?(node) if @tags allowed_node?(node) else Loofah::HTML5::Scrub.allowed_element?(node.name) end end def scrub_node(node) node.before(node.children) # strip node.remove end def scrub_attributes(node) if @attributes node.attribute_nodes.each do |attr| attr.remove if scrub_attribute?(attr.name) scrub_attribute(node, attr) end scrub_css_attribute(node) else Loofah::HTML5::Scrub.scrub_attributes(node) end end def scrub_css_attribute(node) if Loofah::HTML5::Scrub.respond_to?(:scrub_css_attribute) Loofah::HTML5::Scrub.scrub_css_attribute(node) else style = node.attributes['style'] style.value = Loofah::HTML5::Scrub.scrub_css(style.value) if style end end def validate!(var, name) if var && !var.is_a?(Enumerable) raise ArgumentError, "You should pass :#{name} as an Enumerable" end var end def scrub_attribute(node, attr_node) attr_name = if attr_node.namespace "#{attr_node.namespace.prefix}:#{attr_node.node_name}" else attr_node.node_name end if Loofah::HTML5::WhiteList::ATTR_VAL_IS_URI.include?(attr_name) # this block lifted nearly verbatim from HTML5 sanitization val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(Loofah::HTML5::Scrub::CONTROL_CHARACTERS,'').downcase if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! Loofah::HTML5::WhiteList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(Loofah::HTML5::WhiteList::PROTOCOL_SEPARATOR)[0]) attr_node.remove end end if Loofah::HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name) attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value end if Loofah::HTML5::WhiteList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m attr_node.remove end node.remove_attribute(attr_node.name) if attr_name == 'src' && attr_node.value !~ /[^[:space:]]/ end end # === Rails::Html::TargetScrubber # # Where Rails::Html::PermitScrubber picks out tags and attributes to permit in # sanitization, Rails::Html::TargetScrubber targets them for removal. # # +tags=+ # If set, elements included will be stripped. # # +attributes=+ # If set, attributes included will be removed. class TargetScrubber < PermitScrubber def allowed_node?(node) !super end def scrub_attribute?(name) !super end end # === Rails::Html::TextOnlyScrubber # # Rails::Html::TextOnlyScrubber allows you to permit text nodes. # # Unallowed elements will be stripped, i.e. element is removed but its subtree kept. class TextOnlyScrubber < Loofah::Scrubber def initialize @direction = :bottom_up end def scrub(node) if node.text? CONTINUE else node.before node.children node.remove end end end end end rails-html-sanitizer-1.0.3/lib/rails/html/sanitizer/ 0000755 0000041 0000041 00000000000 12651763325 022473 5 ustar www-data www-data rails-html-sanitizer-1.0.3/lib/rails/html/sanitizer/version.rb 0000644 0000041 0000041 00000000131 12651763325 024500 0 ustar www-data www-data module Rails module Html class Sanitizer VERSION = "1.0.3" end end end rails-html-sanitizer-1.0.3/lib/rails/html/sanitizer.rb 0000644 0000041 0000041 00000012337 12651763325 023026 0 ustar www-data www-data module Rails module Html XPATHS_TO_REMOVE = %w{.//script .//form comment()} class Sanitizer # :nodoc: def sanitize(html, options = {}) raise NotImplementedError, "subclasses must implement sanitize method." end private def remove_xpaths(node, xpaths) node.xpath(*xpaths).remove node end def properly_encode(fragment, options) fragment.xml? ? fragment.to_xml(options) : fragment.to_html(options) end end # === Rails::Html::FullSanitizer # Removes all tags but strips out scripts, forms and comments. # # full_sanitizer = Rails::Html::FullSanitizer.new # full_sanitizer.sanitize("Bold no more! See more here...") # # => Bold no more! See more here... class FullSanitizer < Sanitizer def sanitize(html, options = {}) return unless html return html if html.empty? loofah_fragment = Loofah.fragment(html) remove_xpaths(loofah_fragment, XPATHS_TO_REMOVE) loofah_fragment.scrub!(TextOnlyScrubber.new) properly_encode(loofah_fragment, encoding: 'UTF-8') end end # === Rails::Html::LinkSanitizer # Removes a tags and href attributes leaving only the link text # # link_sanitizer = Rails::Html::LinkSanitizer.new # link_sanitizer.sanitize('Only the link text will be kept.') # # => Only the link text will be kept. class LinkSanitizer < Sanitizer def initialize @link_scrubber = TargetScrubber.new @link_scrubber.tags = %w(a href) @link_scrubber.attributes = %w(href) end def sanitize(html, options = {}) Loofah.scrub_fragment(html, @link_scrubber).to_s end end # === Rails::Html::WhiteListSanitizer # Sanitizes html and css from an extensive white list (see link further down). # # === Whitespace # We can't make any guarentees about whitespace being kept or stripped. # Loofah uses Nokogiri, which wraps either a C or Java parser for the # respective Ruby implementation. # Those two parsers determine how whitespace is ultimately handled. # # When the stripped markup will be rendered the users browser won't take # whitespace into account anyway. It might be better to suggest your users # wrap their whitespace sensitive content in pre tags or that you do # so automatically. # # === Options # Sanitizes both html and css via the white lists found here: # https://github.com/flavorjones/loofah/blob/master/lib/loofah/html5/whitelist.rb # # WhiteListSanitizer also accepts options to configure # the white list used when sanitizing html. # There's a class level option: # Rails::Html::WhiteListSanitizer.allowed_tags = %w(table tr td) # Rails::Html::WhiteListSanitizer.allowed_attributes = %w(id class style) # # Tags and attributes can also be passed to +sanitize+. # Passed options take precedence over the class level options. # # === Examples # white_list_sanitizer = Rails::Html::WhiteListSanitizer.new # # Sanitize css doesn't take options # white_list_sanitizer.sanitize_css('background-color: #000;') # # Default: sanitize via a extensive white list of allowed elements # white_list_sanitizer.sanitize(@article.body) # # White list via the supplied tags and attributes # white_list_sanitizer.sanitize(@article.body, tags: %w(table tr td), # attributes: %w(id class style)) # # White list via a custom scrubber # white_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new) class WhiteListSanitizer < Sanitizer class << self attr_accessor :allowed_tags attr_accessor :allowed_attributes end self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr acronym a img blockquote del ins)) self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr)) def initialize @permit_scrubber = PermitScrubber.new end def sanitize(html, options = {}) return unless html return html if html.empty? loofah_fragment = Loofah.fragment(html) if scrubber = options[:scrubber] # No duck typing, Loofah ensures subclass of Loofah::Scrubber loofah_fragment.scrub!(scrubber) elsif allowed_tags(options) || allowed_attributes(options) @permit_scrubber.tags = allowed_tags(options) @permit_scrubber.attributes = allowed_attributes(options) loofah_fragment.scrub!(@permit_scrubber) else remove_xpaths(loofah_fragment, XPATHS_TO_REMOVE) loofah_fragment.scrub!(:strip) end properly_encode(loofah_fragment, encoding: 'UTF-8') end def sanitize_css(style_string) Loofah::HTML5::Scrub.scrub_css(style_string) end private def allowed_tags(options) options[:tags] || self.class.allowed_tags end def allowed_attributes(options) options[:attributes] || self.class.allowed_attributes end end end end rails-html-sanitizer-1.0.3/metadata.yml 0000644 0000041 0000041 00000006051 12651763325 020144 0 ustar www-data www-data --- !ruby/object:Gem::Specification name: rails-html-sanitizer version: !ruby/object:Gem::Version version: 1.0.3 platform: ruby authors: - Rafael Mendonça França - Kasper Timm Hansen autorequire: bindir: bin cert_chain: [] date: 2016-01-25 00:00:00.000000000 Z dependencies: - !ruby/object:Gem::Dependency name: loofah requirement: !ruby/object:Gem::Requirement requirements: - - "~>" - !ruby/object:Gem::Version version: '2.0' type: :runtime prerelease: false version_requirements: !ruby/object:Gem::Requirement requirements: - - "~>" - !ruby/object:Gem::Version version: '2.0' - !ruby/object:Gem::Dependency name: bundler requirement: !ruby/object:Gem::Requirement requirements: - - "~>" - !ruby/object:Gem::Version version: '1.3' type: :development prerelease: false version_requirements: !ruby/object:Gem::Requirement requirements: - - "~>" - !ruby/object:Gem::Version version: '1.3' - !ruby/object:Gem::Dependency name: rake requirement: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version version: '0' type: :development prerelease: false version_requirements: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version version: '0' - !ruby/object:Gem::Dependency name: minitest requirement: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version version: '0' type: :development prerelease: false version_requirements: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version version: '0' - !ruby/object:Gem::Dependency name: rails-dom-testing requirement: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version version: '0' type: :development prerelease: false version_requirements: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version version: '0' description: HTML sanitization for Rails applications email: - rafaelmfranca@gmail.com - kaspth@gmail.com executables: [] extensions: [] extra_rdoc_files: [] files: - CHANGELOG.md - README.md - lib/rails-html-sanitizer.rb - lib/rails/html/sanitizer.rb - lib/rails/html/sanitizer/version.rb - lib/rails/html/scrubbers.rb - test/sanitizer_test.rb - test/scrubbers_test.rb homepage: https://github.com/rails/rails-html-sanitizer licenses: - MIT metadata: {} post_install_message: rdoc_options: [] require_paths: - lib required_ruby_version: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version version: '0' required_rubygems_version: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version version: '0' requirements: [] rubyforge_project: rubygems_version: 2.5.1 signing_key: specification_version: 4 summary: This gem is responsible to sanitize HTML fragments in Rails applications. test_files: - test/sanitizer_test.rb - test/scrubbers_test.rb rails-html-sanitizer-1.0.3/test/ 0000755 0000041 0000041 00000000000 12651763325 016616 5 ustar www-data www-data rails-html-sanitizer-1.0.3/test/sanitizer_test.rb 0000644 0000041 0000041 00000046710 12651763325 022222 0 ustar www-data www-data require "minitest/autorun" require "rails-html-sanitizer" require "rails/dom/testing/assertions/dom_assertions" class SanitizersTest < Minitest::Test include Rails::Dom::Testing::Assertions::DomAssertions def test_sanitizer_sanitize_raises_not_implemented_error assert_raises NotImplementedError do Rails::Html::Sanitizer.new.sanitize('') end end def test_sanitize_nested_script sanitizer = Rails::Html::WhiteListSanitizer.new assert_equal '<script>alert("XSS");</script>', sanitizer.sanitize('alert("XSS");/', tags: %w(em)) end def test_sanitize_nested_script_in_style sanitizer = Rails::Html::WhiteListSanitizer.new assert_equal '<script>alert("XSS");</script>', sanitizer.sanitize('alert("XSS");/', tags: %w(em)) end class XpathRemovalTestSanitizer < Rails::Html::Sanitizer def sanitize(html, options = {}) fragment = Loofah.fragment(html) remove_xpaths(fragment, options[:xpaths]).to_s end end def test_remove_xpaths_removes_an_xpath html = %(
hello
hello
It no longer contains any HTML.
This is a test.
") end def test_strip_tags_with_many_open_quotes assert_equal "", full_sanitize("<<foo) assert_equal text, white_list_sanitize(text) end def test_should_allow_custom_tags_with_custom_attributes text = %(
Lorem ipsum) assert_equal text, white_list_sanitize(text, attributes: ['foo']) end def test_scrub_style_if_style_attribute_option_is_passed input = '' assert_equal '', white_list_sanitize(input, attributes: %w(style)) end def test_should_raise_argument_error_if_tags_is_not_enumerable assert_raises ArgumentError do white_list_sanitize('some html', tags: 'foo') end end def test_should_raise_argument_error_if_attributes_is_not_enumerable assert_raises ArgumentError do white_list_sanitize('some html', attributes: 'foo') end end def test_should_not_accept_non_loofah_inheriting_scrubber scrubber = Object.new def scrubber.scrub(node); node.name = 'h1'; end assert_raises Loofah::ScrubberNotFound do white_list_sanitize('some html', scrubber: scrubber) end end def test_should_accept_loofah_inheriting_scrubber scrubber = Loofah::Scrubber.new def scrubber.scrub(node); node.name = 'h1'; end html = "" assert_equal "