sanitize-4.6.6/0000755000004100000410000000000013337503361013411 5ustar www-datawww-datasanitize-4.6.6/test/0000755000004100000410000000000013337503361014370 5ustar www-datawww-datasanitize-4.6.6/test/test_malicious_css.rb0000644000004100000410000000235513337503361020616 0ustar www-datawww-data# encoding: utf-8 require_relative 'common' # Miscellaneous attempts to sneak maliciously crafted CSS past Sanitize. Some of # these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat # Sheet. # # https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet describe 'Malicious CSS' do make_my_diffs_pretty! parallelize_me! before do @s = Sanitize::CSS.new(Sanitize::Config::RELAXED) end it 'should not be possible to inject an expression by munging it with a comment' do @s.properties(%[width:expr/*XSS*/ession(alert('XSS'))]). must_equal '' @s.properties(%[width:ex/*XSS*//*/*/pression(alert("XSS"))]). must_equal '' end it 'should not be possible to inject an expression by munging it with a newline' do @s.properties(%[width:\nexpression(alert('XSS'));]). must_equal '' end it 'should not allow the javascript protocol' do @s.properties(%[background-image:url("javascript:alert('XSS')");]). must_equal '' Sanitize.fragment(%[
], Sanitize::Config::RELAXED).must_equal '
' end it 'should not allow behaviors' do @s.properties(%[behavior: url(xss.htc);]).must_equal '' end end sanitize-4.6.6/test/common.rb0000644000004100000410000000126113337503361016205 0ustar www-datawww-data# encoding: utf-8 gem 'minitest' require 'minitest/autorun' require_relative '../lib/sanitize' # Helper to stub an instance method. Shamelessly stolen from # https://github.com/codeodor/minitest-stub_any_instance/ class Object def self.stub_instance(name, value, &block) old_method = "__stubbed_method_#{name}__" class_eval do alias_method old_method, name define_method(name) do |*args| if value.respond_to?(:call) then value.call(*args) else value end end end yield ensure class_eval do undef_method name alias_method name, old_method undef_method old_method end end end sanitize-4.6.6/test/test_clean_comment.rb0000644000004100000410000000425513337503361020566 0ustar www-datawww-data# encoding: utf-8 require_relative 'common' describe 'Sanitize::Transformers::CleanComment' do make_my_diffs_pretty! parallelize_me! describe 'when :allow_comments is false' do before do @s = Sanitize.new(:allow_comments => false, :elements => ['div']) end it 'should remove comments' do @s.fragment('foo bar').must_equal 'foo bar' @s.fragment('foo bar").must_equal 'foo bar' @s.fragment("foo --> -->bar").must_equal 'foo --> -->bar' @s.fragment("foo
>bar
").must_equal 'foo
>bar
' # Special case: the comment markup is inside a ").must_equal '<!-- comment -->' Sanitize.fragment("", :allow_comments => false, :elements => ['script']) .must_equal '' end end describe 'when :allow_comments is true' do before do @s = Sanitize.new(:allow_comments => true, :elements => ['div']) end it 'should allow comments' do @s.fragment('foo bar').must_equal 'foo bar' @s.fragment('foo ' @s.fragment('foo ' @s.fragment("foo bar").must_equal "foo bar" @s.fragment("foo --> -->bar").must_equal 'foo --> -->bar' @s.fragment("foo
>bar
").must_equal 'foo
>bar
' # Special case: the comment markup is inside a ").must_equal '<!-- comment -->' Sanitize.fragment("", :allow_comments => true, :elements => ['script']) .must_equal '' end end end sanitize-4.6.6/test/test_clean_css.rb0000644000004100000410000000322713337503361017712 0ustar www-datawww-data# encoding: utf-8 require_relative 'common' describe 'Sanitize::Transformers::CSS::CleanAttribute' do make_my_diffs_pretty! parallelize_me! before do @s = Sanitize.new(Sanitize::Config::RELAXED) end it 'should sanitize CSS properties in style attributes' do @s.fragment(%[
].strip).must_equal %[
].strip end it 'should remove the style attribute if the sanitized CSS is empty' do @s.fragment('
'). must_equal '
' end end describe 'Sanitize::Transformers::CSS::CleanElement' do make_my_diffs_pretty! parallelize_me! before do @s = Sanitize.new(Sanitize::Config::RELAXED) end it 'should sanitize CSS stylesheets in ].strip @s.fragment(html).must_equal %[ ].strip end it 'should remove the ').must_equal '' end end sanitize-4.6.6/test/test_config.rb0000644000004100000410000000361413337503361017225 0ustar www-datawww-data# encoding: utf-8 require_relative 'common' describe 'Config' do make_my_diffs_pretty! parallelize_me! def verify_deeply_frozen(config) config.must_be :frozen? if Hash === config config.each_value {|v| verify_deeply_frozen(v) } elsif Set === config || Array === config config.each {|v| verify_deeply_frozen(v) } end end it 'built-in configs should be deeply frozen' do verify_deeply_frozen Sanitize::Config::DEFAULT verify_deeply_frozen Sanitize::Config::BASIC verify_deeply_frozen Sanitize::Config::RELAXED verify_deeply_frozen Sanitize::Config::RESTRICTED end describe '.freeze_config' do it 'should deeply freeze and return a configuration Hash' do a = {:one => {:one_one => [0, '1', :a], :one_two => false, :one_three => Set.new([:a, :b, :c])}} b = Sanitize::Config.freeze_config(a) b.must_be_same_as a verify_deeply_frozen a end end describe '.merge' do it 'should deeply merge a configuration Hash' do # Freeze to ensure that we get an error if either Hash is modified. a = Sanitize::Config.freeze_config({:one => {:one_one => [0, '1', :a], :one_two => false, :one_three => Set.new([:a, :b, :c])}}) b = Sanitize::Config.freeze_config({:one => {:one_two => true, :one_three => 3}, :two => 2}) c = Sanitize::Config.merge(a, b) c.wont_be_same_as a c.wont_be_same_as b c.must_equal( :one => { :one_one => [0, '1', :a], :one_two => true, :one_three => 3 }, :two => 2 ) c[:one].wont_be_same_as a[:one] c[:one][:one_one].wont_be_same_as a[:one][:one_one] end it 'should raise an ArgumentError if either argument is not a Hash' do proc { Sanitize::Config.merge('foo', {}) }.must_raise ArgumentError proc { Sanitize::Config.merge({}, 'foo') }.must_raise ArgumentError end end end sanitize-4.6.6/test/test_malicious_html.rb0000644000004100000410000001450213337503361020767 0ustar www-datawww-data# encoding: utf-8 require_relative 'common' # Miscellaneous attempts to sneak maliciously crafted HTML past Sanitize. Many # of these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat # Sheet. # # https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet describe 'Malicious HTML' do make_my_diffs_pretty! parallelize_me! before do @s = Sanitize.new(Sanitize::Config::RELAXED) end describe 'comments' do it 'should not allow script injection via conditional comments' do @s.fragment(%[]). must_equal '' end end describe 'interpolation (ERB, PHP, etc.)' do it 'should escape ERB-style tags' do @s.fragment('<% naughty_ruby_code %>'). must_equal '<% naughty_ruby_code %>' @s.fragment('<%= naughty_ruby_code %>'). must_equal '<%= naughty_ruby_code %>' end it 'should remove PHP-style tags' do @s.fragment(''). must_equal '' @s.fragment(''). must_equal '' end end describe '' do it 'should not be possible to inject JS via a malformed event attribute' do @s.document(''). must_equal "\n" end end describe '' Sanitize.fragment(input, :transformers => youtube_transformer) .must_equal '' end it 'should allow HTTPS YouTube video embeds' do input = '' Sanitize.fragment(input, :transformers => youtube_transformer) .must_equal '' end it 'should allow protocol-relative YouTube video embeds' do input = '' Sanitize.fragment(input, :transformers => youtube_transformer) .must_equal '' end it 'should allow privacy-enhanced YouTube video embeds' do input = '' Sanitize.fragment(input, :transformers => youtube_transformer) .must_equal '' end it 'should not allow non-YouTube video embeds' do input = '' Sanitize.fragment(input, :transformers => youtube_transformer) .must_equal('') end end describe 'DOM modification transformer' do b_to_strong_tag_transformer = lambda do |env| node = env[:node] node_name = env[:node_name] if node_name == 'b' node.name = 'strong' end end it 'should allow the tag to be changed to a tag' do input = 'text' Sanitize.fragment(input, :elements => ['strong'], :transformers => b_to_strong_tag_transformer) .must_equal 'text' end end end sanitize-4.6.6/test/test_clean_element.rb0000644000004100000410000003702113337503361020552 0ustar www-datawww-data# encoding: utf-8 require_relative 'common' describe 'Sanitize::Transformers::CleanElement' do make_my_diffs_pretty! parallelize_me! strings = { :basic => { :html => 'Lorem ipsum dolor sit
amet ', :default => 'Lorem ipsum dolor sit amet .foo { color: #fff; } alert("hello world");', :restricted => 'Lorem ipsum dolor sit amet .foo { color: #fff; } alert("hello world");', :basic => 'Lorem ipsum dolor sit
amet .foo { color: #fff; } alert("hello world");', :relaxed => 'Lorem ipsum dolor sit
amet alert("hello world");' }, :malformed => { :html => 'Lorem
dolor sit
amet ', :default => 'Lorem ipsum dolor sit amet <script>alert("hello world");', :restricted => 'Lorem ipsum dolor sit amet <script>alert("hello world");', :basic => 'Lorem ipsum dolor sit
amet <script>alert("hello world");', :relaxed => 'Lorem ipsum dolor sit
amet <script>alert("hello world");' } } protocols = { 'protocol-based JS injection: simple, no spaces' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: simple, spaces before' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: simple, spaces after' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: simple, spaces before and after' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: preceding colon' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: UTF-8 encoding' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: long UTF-8 encoding' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: long UTF-8 encoding without semicolons' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: hex encoding' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: long hex encoding' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: hex encoding without semicolons' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: null char' => { :html => "", :default => '', :restricted => '', :basic => '', :relaxed => '' }, 'protocol-based JS injection: invalid URL char' => { :html => '', :default => '', :restricted => '', :basic => '', :relaxed => '' }, 'protocol-based JS injection: spaces and entities' => { :html => '', :default => '', :restricted => '', :basic => '', :relaxed => '' }, 'protocol whitespace' => { :html => '', :default => '', :restricted => '', :basic => '', :relaxed => '' } } describe 'Default config' do it 'should remove non-whitelisted elements, leaving safe contents behind' do Sanitize.fragment('foo bar baz quux') .must_equal 'foo bar baz quux' Sanitize.fragment('') .must_equal 'alert("<xss>");' Sanitize.fragment('<') .must_equal '< script <>> alert("");' end it 'should surround the contents of :whitespace_elements with space characters when removing the element' do Sanitize.fragment('foo
bar
baz') .must_equal 'foo bar baz' Sanitize.fragment('foo
bar
baz') .must_equal 'foo bar baz' Sanitize.fragment('foo
bar
baz') .must_equal 'foo bar baz' end it 'should not choke on several instances of the same element in a row' do Sanitize.fragment('') .must_equal '' end strings.each do |name, data| it "should clean #{name} HTML" do Sanitize.fragment(data[:html]).must_equal(data[:default]) end end protocols.each do |name, data| it "should not allow #{name}" do Sanitize.fragment(data[:html]).must_equal(data[:default]) end end end describe 'Restricted config' do before do @s = Sanitize.new(Sanitize::Config::RESTRICTED) end strings.each do |name, data| it "should clean #{name} HTML" do @s.fragment(data[:html]).must_equal(data[:restricted]) end end protocols.each do |name, data| it "should not allow #{name}" do @s.fragment(data[:html]).must_equal(data[:restricted]) end end end describe 'Basic config' do before do @s = Sanitize.new(Sanitize::Config::BASIC) end it 'should not choke on valueless attributes' do @s.fragment('foo foo bar') .must_equal 'foo foo bar' end it 'should downcase attribute names' do @s.fragment('bar') .must_equal 'bar' end strings.each do |name, data| it "should clean #{name} HTML" do @s.fragment(data[:html]).must_equal(data[:basic]) end end protocols.each do |name, data| it "should not allow #{name}" do @s.fragment(data[:html]).must_equal(data[:basic]) end end end describe 'Relaxed config' do before do @s = Sanitize.new(Sanitize::Config::RELAXED) end it 'should encode special chars in attribute values' do @s.fragment('foo') .must_equal 'foo' end strings.each do |name, data| it "should clean #{name} HTML" do @s.fragment(data[:html]).must_equal(data[:relaxed]) end end protocols.each do |name, data| it "should not allow #{name}" do @s.fragment(data[:html]).must_equal(data[:relaxed]) end end end describe 'Custom configs' do it 'should allow attributes on all elements if whitelisted under :all' do input = '

bar

' Sanitize.fragment(input).must_equal ' bar ' Sanitize.fragment(input, { :elements => ['p'], :attributes => {:all => ['class']} }).must_equal input Sanitize.fragment(input, { :elements => ['p'], :attributes => {'div' => ['class']} }).must_equal '

bar

' Sanitize.fragment(input, { :elements => ['p'], :attributes => {'p' => ['title'], :all => ['class']} }).must_equal input end it "should not allow relative URLs when relative URLs aren't whitelisted" do input = 'Link' Sanitize.fragment(input, :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => {'a' => {'href' => ['http']}} ).must_equal 'Link' end it 'should allow relative URLs containing colons when the colon is not in the first path segment' do input = 'Random Page' Sanitize.fragment(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => {'a' => {'href' => [:relative]}} }).must_equal input end it 'should allow relative URLs containing colons when the colon is part of an anchor' do input = 'Footnote 1' Sanitize.fragment(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => {'a' => {'href' => [:relative]}} }).must_equal input input = 'Footnote 1' Sanitize.fragment(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => {'a' => {'href' => [:relative]}} }).must_equal input end it 'should remove the contents of filtered nodes when :remove_contents is true' do Sanitize.fragment('foo bar
bazquux
', :remove_contents => true ).must_equal 'foo bar ' end it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as strings' do Sanitize.fragment('foo bar
bazquux
', :remove_contents => ['script', 'span'] ).must_equal 'foo bar baz ' end it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as symbols' do Sanitize.fragment('foo bar
bazquux
', :remove_contents => [:script, :span] ).must_equal 'foo bar baz ' end it 'should not allow arbitrary HTML5 data attributes by default' do Sanitize.fragment('', :elements => ['b'] ).must_equal '' Sanitize.fragment('', :attributes => {'b' => ['class']}, :elements => ['b'] ).must_equal '' end it 'should allow arbitrary HTML5 data attributes when the :attributes config includes :data' do s = Sanitize.new( :attributes => {'b' => [:data]}, :elements => ['b'] ) s.fragment('') .must_equal '' s.fragment('') .must_equal '' s.fragment('') .must_equal '' s.fragment('') .must_equal '' s.fragment('') .must_equal '' s.fragment('') .must_equal '' s.fragment('') .must_equal '' # Nokogiri quirk; not ideal, but harmless s.fragment('') .must_equal '' # Another annoying Nokogiri quirk. end it 'should replace whitespace_elements with configured :before and :after values' do s = Sanitize.new( :whitespace_elements => { 'p' => { :before => "\n", :after => "\n" }, 'div' => { :before => "\n", :after => "\n" }, 'br' => { :before => "\n", :after => "\n" }, } ) s.fragment('

foo

').must_equal "\nfoo\n" s.fragment('

foo

bar

').must_equal "\nfoo\n\nbar\n" s.fragment('foo
bar
baz').must_equal "foo\nbar\nbaz" s.fragment('foo
bar
baz').must_equal "foo\nbar\nbaz" end it 'handles protocols correctly regardless of case' do input = 'Text' Sanitize.fragment(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => {'a' => {'href' => ['https']}} }).must_equal input input = 'Text' Sanitize.fragment(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => {'a' => {'href' => ['https']}} }).must_equal "Text" end end end sanitize-4.6.6/test/test_clean_doctype.rb0000644000004100000410000000531113337503361020565 0ustar www-datawww-data# encoding: utf-8 require_relative 'common' describe 'Sanitize::Transformers::CleanDoctype' do make_my_diffs_pretty! parallelize_me! describe 'when :allow_doctype is false' do before do @s = Sanitize.new(:allow_doctype => false, :elements => ['html']) end it 'should remove doctype declarations' do @s.document('foo').must_equal "foo\n" @s.fragment('foo').must_equal 'foo' end it 'should not allow doctype definitions in fragments' do @s.fragment('foo') .must_equal "foo" @s.fragment('foo') .must_equal "foo" @s.fragment("foo") .must_equal "foo" end end describe 'when :allow_doctype is true' do before do @s = Sanitize.new(:allow_doctype => true, :elements => ['html']) end it 'should allow doctype declarations in documents' do @s.document('foo') .must_equal "\nfoo\n" @s.document('foo') .must_equal "\nfoo\n" @s.document("foo") .must_equal "\nfoo\n" end it 'should not allow obviously invalid doctype declarations in documents' do @s.document('foo') .must_equal "\nfoo\n" @s.document('foo') .must_equal "\nfoo\n" @s.document('foo') .must_equal "\nfoo\n" @s.document('foo') .must_equal "foo\n" end it 'should not allow doctype definitions in fragments' do @s.fragment('foo') .must_equal "foo" @s.fragment('foo') .must_equal "foo" @s.fragment("foo") .must_equal "foo" end end end sanitize-4.6.6/test/test_unicode.rb0000644000004100000410000000662013337503361017406 0ustar www-datawww-data# encoding: utf-8 require_relative 'common' describe 'Unicode' do make_my_diffs_pretty! parallelize_me! # http://www.w3.org/TR/unicode-xml/#Charlist describe 'Unsuitable characters' do before do @s = Sanitize.new(Sanitize::Config::RELAXED) end it 'should not modify the input string' do fragment = "a\u0340b\u0341c" document = "a\u0340b\u0341c" @s.document(document) @s.fragment(fragment) fragment.must_equal "a\u0340b\u0341c" document.must_equal "a\u0340b\u0341c" end it 'should strip deprecated grave and acute clones' do @s.document("a\u0340b\u0341c").must_equal "abc\n" @s.fragment("a\u0340b\u0341c").must_equal 'abc' end it 'should strip deprecated Khmer characters' do @s.document("a\u17a3b\u17d3c").must_equal "abc\n" @s.fragment("a\u17a3b\u17d3c").must_equal 'abc' end it 'should strip line and paragraph separator punctuation' do @s.document("a\u2028b\u2029c").must_equal "abc\n" @s.fragment("a\u2028b\u2029c").must_equal 'abc' end it 'should strip bidi embedding control characters' do @s.document("a\u202ab\u202bc\u202cd\u202de\u202e") .must_equal "abcde\n" @s.fragment("a\u202ab\u202bc\u202cd\u202de\u202e") .must_equal 'abcde' end it 'should strip deprecated symmetric swapping characters' do @s.document("a\u206ab\u206bc").must_equal "abc\n" @s.fragment("a\u206ab\u206bc").must_equal 'abc' end it 'should strip deprecated Arabic form shaping characters' do @s.document("a\u206cb\u206dc").must_equal "abc\n" @s.fragment("a\u206cb\u206dc").must_equal 'abc' end it 'should strip deprecated National digit shape characters' do @s.document("a\u206eb\u206fc").must_equal "abc\n" @s.fragment("a\u206eb\u206fc").must_equal 'abc' end it 'should strip interlinear annotation characters' do @s.document("a\ufff9b\ufffac\ufffb").must_equal "abc\n" @s.fragment("a\ufff9b\ufffac\ufffb").must_equal 'abc' end it 'should strip BOM/zero-width non-breaking space characters' do @s.document("a\ufeffbc").must_equal "abc\n" @s.fragment("a\ufeffbc").must_equal 'abc' end it 'should strip object replacement characters' do @s.document("a\ufffcbc").must_equal "abc\n" @s.fragment("a\ufffcbc").must_equal 'abc' end it 'should strip musical notation scoping characters' do @s.document("a\u{1d173}b\u{1d174}c\u{1d175}d\u{1d176}e\u{1d177}f\u{1d178}g\u{1d179}h\u{1d17a}") .must_equal "abcdefgh\n" @s.fragment("a\u{1d173}b\u{1d174}c\u{1d175}d\u{1d176}e\u{1d177}f\u{1d178}g\u{1d179}h\u{1d17a}") .must_equal 'abcdefgh' end it 'should strip language tag code point characters' do str = String.new 'a' (0xE0000..0xE007F).each {|n| str << [n].pack('U') } str << 'b' @s.document(str).must_equal "ab\n" @s.fragment(str).must_equal 'ab' end end end sanitize-4.6.6/README.md0000644000004100000410000004775513337503361014712 0ustar www-datawww-dataSanitize ======== Sanitize is a whitelist-based HTML and CSS sanitizer. Given a list of acceptable elements, attributes, and CSS properties, Sanitize will remove all unacceptable HTML and/or CSS from a string. Using a simple configuration syntax, you can tell Sanitize to allow certain HTML elements, certain attributes within those elements, and even certain URL protocols within attributes that contain URLs. You can also whitelist CSS properties, @ rules, and URL protocols you wish to allow in elements or attributes containing CSS. Any HTML or CSS that you don't explicitly allow will be removed. Sanitize is based on [Google's Gumbo HTML5 parser][gumbo], which parses HTML exactly the same way modern browsers do, and [Crass][crass], which parses CSS exactly the same way modern browsers do. As long as your whitelist config only allows safe markup and CSS, even the most malformed or malicious input will be transformed into safe output. [![Build Status](https://travis-ci.org/rgrove/sanitize.svg?branch=master)](https://travis-ci.org/rgrove/sanitize) [![Gem Version](https://badge.fury.io/rb/sanitize.svg)](http://badge.fury.io/rb/sanitize) [crass]:https://github.com/rgrove/crass [gumbo]:https://github.com/google/gumbo-parser Links ----- * [Home](https://github.com/rgrove/sanitize/) * [API Docs](http://rubydoc.info/github/rgrove/sanitize/master) * [Issues](https://github.com/rgrove/sanitize/issues) * [Release History](https://github.com/rgrove/sanitize/blob/master/HISTORY.md#sanitize-history) * [Online Demo](https://sanitize.herokuapp.com/) * [Biased comparison of Ruby HTML sanitization libraries](https://github.com/rgrove/sanitize/blob/master/COMPARISON.md) Installation ------------- ``` gem install sanitize ``` Quick Start ----------- ```ruby require 'sanitize' # Clean up an HTML fragment using Sanitize's permissive but safe Relaxed config. # This also sanitizes any CSS in `

hello!

] Sanitize.fragment(html, :elements => ['div', 'style'], :attributes => {'div' => ['style']}, :css => { :properties => ['width'] } ) #=> %[ # # #
# hello! # ] ``` ### Standalone CSS Sanitize will happily clean up a standalone CSS stylesheet or property string without needing to invoke the HTML parser. ```ruby css = %[ @import url(evil.css); a { text-decoration: none; } a:hover { left: expression(alert('xss!')); text-decoration: underline; } ] Sanitize::CSS.stylesheet(css, Sanitize::Config::RELAXED) # => %[ # # # # a { text-decoration: none; } # # a:hover { # # text-decoration: underline; # } # ] Sanitize::CSS.properties(%[ left: expression(alert('xss!')); text-decoration: underline; ], Sanitize::Config::RELAXED) # => %[ # # text-decoration: underline; # ] ``` Configuration ------------- In addition to the ultra-safe default settings, Sanitize comes with three other built-in configurations that you can use out of the box or adapt to meet your needs. ### Sanitize::Config::RESTRICTED Allows only very simple inline markup. No links, images, or block elements. ```ruby Sanitize.fragment(html, Sanitize::Config::RESTRICTED) # => 'foo' ``` ### Sanitize::Config::BASIC Allows a variety of markup including formatting elements, links, and lists. Images and tables are not allowed, links are limited to FTP, HTTP, HTTPS, and mailto protocols, and a `rel="nofollow"` attribute is added to all links to mitigate SEO spam. ```ruby Sanitize.fragment(html, Sanitize::Config::BASIC) # => 'foo' ``` ### Sanitize::Config::RELAXED Allows an even wider variety of markup, including images and tables, as well as safe CSS. Links are still limited to FTP, HTTP, HTTPS, and mailto protocols, while images are limited to HTTP and HTTPS. In this mode, `rel="nofollow"` is not added to links. ```ruby Sanitize.fragment(html, Sanitize::Config::RELAXED) # => 'foo' ``` ### Custom Configuration If the built-in modes don't meet your needs, you can easily specify a custom configuration: ```ruby Sanitize.fragment(html, :elements => ['a', 'span'], :attributes => { 'a' => ['href', 'title'], 'span' => ['class'] }, :protocols => { 'a' => {'href' => ['http', 'https', 'mailto']} } ) ``` You can also start with one of Sanitize's built-in configurations and then customize it to meet your needs. The built-in configs are deeply frozen to prevent people from modifying them (either accidentally or maliciously). To customize a built-in config, create a new copy using `Sanitize::Config.merge()`, like so: ```ruby # Create a customized copy of the Basic config, adding
and to the # existing whitelisted elements. Sanitize.fragment(html, Sanitize::Config.merge(Sanitize::Config::BASIC, :elements => Sanitize::Config::BASIC[:elements] + ['div', 'table'], :remove_contents => true )) ``` The example above adds the `
` and `
` elements to a copy of the existing list of elements in `Sanitize::Config::BASIC`. If you instead want to completely overwrite the elements array with your own, you can omit the `+` operation: ```ruby # Overwrite :elements instead of creating a copy with new entries. Sanitize.fragment(html, Sanitize::Config.merge(Sanitize::Config::BASIC, :elements => ['div', 'table'], :remove_contents => true )) ``` ### Config Settings #### :add_attributes (Hash) Attributes to add to specific elements. If the attribute already exists, it will be replaced with the value specified here. Specify all element names and attributes in lowercase. ```ruby :add_attributes => { 'a' => {'rel' => 'nofollow'} } ``` #### :allow_comments (boolean) Whether or not to allow HTML comments. Allowing comments is strongly discouraged, since IE allows script execution within conditional comments. The default value is `false`. #### :allow_doctype (boolean) Whether or not to allow well-formed HTML doctype declarations such as "" when sanitizing a document. This setting is ignored when sanitizing fragments. The default value is `false`. #### :attributes (Hash) Attributes to allow on specific elements. Specify all element names and attributes in lowercase. ```ruby :attributes => { 'a' => ['href', 'title'], 'blockquote' => ['cite'], 'img' => ['alt', 'src', 'title'] } ``` If you'd like to allow certain attributes on all elements, use the symbol `:all` instead of an element name. ```ruby # Allow the class attribute on all elements. :attributes => { :all => ['class'], 'a' => ['href', 'title'] } ``` To allow arbitrary HTML5 `data-*` attributes, use the symbol `:data` in place of an attribute name. ```ruby # Allow arbitrary HTML5 data-* attributes on
elements. :attributes => { 'div' => [:data] } ``` #### :css (Hash) Hash of the following CSS config settings to be used when sanitizing CSS (either standalone or embedded in HTML). ##### :css => :allow_comments (boolean) Whether or not to allow CSS comments. The default value is `false`. ##### :css => :allow_hacks (boolean) Whether or not to allow browser compatibility hacks such as the IE `*` and `_` hacks. These are generally harmless, but technically result in invalid CSS. The default is `false`. ##### :css => :at_rules (Array or Set) Names of CSS [at-rules][at-rules] to allow that may not have associated blocks, such as `import` or `charset`. Names should be specified in lowercase. [at-rules]:https://developer.mozilla.org/en-US/docs/Web/CSS/At-rule ##### :css => :at_rules_with_properties (Array or Set) Names of CSS [at-rules][at-rules] to allow that may have associated blocks containing CSS properties. At-rules like `font-face` and `page` fall into this category. Names should be specified in lowercase. ##### :css => :at_rules_with_styles (Array or Set) Names of CSS [at-rules][at-rules] to allow that may have associated blocks containing style rules. At-rules like `media` and `keyframes` fall into this category. Names should be specified in lowercase. ##### :css => :import_url_validator This is a `Proc` (or other callable object) that will be called and passed the URL specified for any `@import` [at-rules][at-rules]. You can use this to limit what can be imported, for example something like the following to limit `@import` to Google Fonts URLs: ```ruby Proc.new { |url| url.start_with?("https://fonts.googleapis.com") } ``` ##### :css => :properties (Array or Set) Whitelist of CSS property names to allow. Names should be specified in lowercase. ##### :css => :protocols (Array or Set) URL protocols to allow in CSS URLs. Should be specified in lowercase. If you'd like to allow the use of relative URLs which don't have a protocol, include the symbol `:relative` in the protocol array. #### :elements (Array or Set) Array of HTML element names to allow. Specify all names in lowercase. Any elements not in this array will be removed. ```ruby :elements => %w[ a abbr b blockquote br cite code dd dfn dl dt em i kbd li mark ol p pre q s samp small strike strong sub sup time u ul var ] ``` #### :protocols (Hash) URL protocols to allow in specific attributes. If an attribute is listed here and contains a protocol other than those specified (or if it contains no protocol at all), it will be removed. ```ruby :protocols => { 'a' => {'href' => ['ftp', 'http', 'https', 'mailto']}, 'img' => {'src' => ['http', 'https']} } ``` If you'd like to allow the use of relative URLs which don't have a protocol, include the symbol `:relative` in the protocol array: ```ruby :protocols => { 'a' => {'href' => ['http', 'https', :relative]} } ``` #### :remove_contents (boolean or Array or Set) If set to `true`, Sanitize will remove the contents of any non-whitelisted elements in addition to the elements themselves. By default, Sanitize leaves the safe parts of an element's contents behind when the element is removed. If set to an array of element names, then only the contents of the specified elements (when filtered) will be removed, and the contents of all other filtered elements will be left behind. The default value is `false`. #### :transformers (Array or callable) Custom HTML transformer or array of custom transformers. See the Transformers section below for details. #### :whitespace_elements (Hash) Hash of element names which, when removed, should have their contents surrounded by whitespace to preserve readability. Each element name is a key pointing to another Hash, which provides the specific whitespace that should be inserted `:before` and `:after` the removed element's position. The `:after` value will only be inserted if the removed element has children, in which case it will be inserted after those children. ```ruby :whitespace_elements => { 'br' => { :before => "\n", :after => "" }, 'div' => { :before => "\n", :after => "\n" }, 'p' => { :before => "\n", :after => "\n" } } ``` The default elements with whitespace added before and after are: ``` address article aside blockquote br dd div dl dt footer h1 h2 h3 h4 h5 h6 header hgroup hr li nav ol p pre section ul ``` ## Transformers Transformers allow you to filter and modify HTML nodes using your own custom logic, on top of (or instead of) Sanitize's core filter. A transformer is any object that responds to `call()` (such as a lambda or proc). To use one or more transformers, pass them to the `:transformers` config setting. You may pass a single transformer or an array of transformers. ```ruby Sanitize.fragment(html, :transformers => [ transformer_one, transformer_two ]) ``` ### Input Each transformer's `call()` method will be called once for each node in the HTML (including elements, text nodes, comments, etc.), and will receive as an argument a Hash that contains the following items: * **:config** - The current Sanitize configuration Hash. * **:is_whitelisted** - `true` if the current node has been whitelisted by a previous transformer, `false` otherwise. It's generally bad form to remove a node that a previous transformer has whitelisted. * **:node** - A `Nokogiri::XML::Node` object representing an HTML node. The node may be an element, a text node, a comment, a CDATA node, or a document fragment. Use Nokogiri's inspection methods (`element?`, `text?`, etc.) to selectively ignore node types you aren't interested in. * **:node_name** - The name of the current HTML node, always lowercase (e.g. "div" or "span"). For non-element nodes, the name will be something like "text", "comment", "#cdata-section", "#document-fragment", etc. * **:node_whitelist** - Set of `Nokogiri::XML::Node` objects in the current document that have been whitelisted by previous transformers, if any. It's generally bad form to remove a node that a previous transformer has whitelisted. ### Output A transformer doesn't have to return anything, but may optionally return a Hash, which may contain the following items: * **:node_whitelist** - Array or Set of specific Nokogiri::XML::Node objects to add to the document's whitelist, bypassing the current Sanitize config. These specific nodes and all their attributes will be whitelisted, but their children will not be. If a transformer returns anything other than a Hash, the return value will be ignored. ### Processing Each transformer has full access to the `Nokogiri::XML::Node` that's passed into it and to the rest of the document via the node's `document()` method. Any changes made to the current node or to the document will be reflected instantly in the document and passed on to subsequently called transformers and to Sanitize itself. A transformer may even call Sanitize internally to perform custom sanitization if needed. Nodes are passed into transformers in the order in which they're traversed. Sanitize performs top-down traversal, meaning that nodes are traversed in the same order you'd read them in the HTML, starting at the top node, then its first child, and so on. ```ruby html = %[
foo

bar

] transformer = lambda do |env| puts env[:node_name] if env[:node].element? end # Prints "header", "span", "strong", "p", "footer". Sanitize.fragment(html, :transformers => transformer) ``` Transformers have a tremendous amount of power, including the power to completely bypass Sanitize's built-in filtering. Be careful! Your safety is in your own hands. ### Example: Transformer to whitelist image URLs by domain The following example demonstrates how to remove image elements unless they use a relative URL or are hosted on a specific domain. It assumes that the `` element and its `src` attribute are already whitelisted. ```ruby require 'uri' image_whitelist_transformer = lambda do |env| # Ignore everything except elements. return unless env[:node_name] == 'img' node = env[:node] image_uri = URI.parse(node['src']) # Only allow relative URLs or URLs with the example.com domain. The # image_uri.host.nil? check ensures that protocol-relative URLs like # "//evil.com/foo.jpg". unless image_uri.host == 'example.com' || (image_uri.host.nil? && image_uri.relative?) node.unlink # `Nokogiri::XML::Node#unlink` removes a node from the document end end ``` ### Example: Transformer to whitelist YouTube video embeds The following example demonstrates how to create a transformer that will safely whitelist valid YouTube video embeds without having to blindly allow other kinds of embedded content, which would be the case if you tried to do this by just whitelisting all ` ] Sanitize.fragment(html, :transformers => youtube_transformer) # => '' ``` License ------- Copyright (c) 2015 Ryan Grove (ryan@wonko.com) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. sanitize-4.6.6/sanitize.gemspec0000644000004100000410000000615213337503361016610 0ustar www-datawww-data######################################################### # This file has been automatically generated by gem2tgz # ######################################################### # -*- encoding: utf-8 -*- # stub: sanitize 4.6.6 ruby lib Gem::Specification.new do |s| s.name = "sanitize".freeze s.version = "4.6.6" s.required_rubygems_version = Gem::Requirement.new(">= 1.2.0".freeze) if s.respond_to? :required_rubygems_version= s.require_paths = ["lib".freeze] s.authors = ["Ryan Grove".freeze] s.date = "2018-07-24" s.description = "Sanitize is a whitelist-based HTML and CSS sanitizer. Given a list of acceptable elements, attributes, and CSS properties, Sanitize will remove all unacceptable HTML and/or CSS from a string.".freeze s.email = "ryan@wonko.com".freeze s.files = ["HISTORY.md".freeze, "LICENSE".freeze, "README.md".freeze, "lib/sanitize.rb".freeze, "lib/sanitize/config.rb".freeze, "lib/sanitize/config/basic.rb".freeze, "lib/sanitize/config/default.rb".freeze, "lib/sanitize/config/relaxed.rb".freeze, "lib/sanitize/config/restricted.rb".freeze, "lib/sanitize/css.rb".freeze, "lib/sanitize/transformers/clean_cdata.rb".freeze, "lib/sanitize/transformers/clean_comment.rb".freeze, "lib/sanitize/transformers/clean_css.rb".freeze, "lib/sanitize/transformers/clean_doctype.rb".freeze, "lib/sanitize/transformers/clean_element.rb".freeze, "lib/sanitize/version.rb".freeze, "test/common.rb".freeze, "test/test_clean_comment.rb".freeze, "test/test_clean_css.rb".freeze, "test/test_clean_doctype.rb".freeze, "test/test_clean_element.rb".freeze, "test/test_config.rb".freeze, "test/test_malicious_css.rb".freeze, "test/test_malicious_html.rb".freeze, "test/test_parser.rb".freeze, "test/test_sanitize.rb".freeze, "test/test_sanitize_css.rb".freeze, "test/test_transformers.rb".freeze, "test/test_unicode.rb".freeze] s.homepage = "https://github.com/rgrove/sanitize/".freeze s.licenses = ["MIT".freeze] s.required_ruby_version = Gem::Requirement.new(">= 1.9.2".freeze) s.rubygems_version = "2.5.2.1".freeze s.summary = "Whitelist-based HTML and CSS sanitizer.".freeze if s.respond_to? :specification_version then s.specification_version = 4 if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then s.add_runtime_dependency(%q.freeze, ["~> 1.0.2"]) s.add_development_dependency(%q.freeze, ["~> 5.10.2"]) s.add_runtime_dependency(%q.freeze, [">= 1.4.4"]) s.add_runtime_dependency(%q.freeze, ["~> 1.4"]) s.add_development_dependency(%q.freeze, ["~> 12.0.0"]) else s.add_dependency(%q.freeze, ["~> 1.0.2"]) s.add_dependency(%q.freeze, ["~> 5.10.2"]) s.add_dependency(%q.freeze, [">= 1.4.4"]) s.add_dependency(%q.freeze, ["~> 1.4"]) s.add_dependency(%q.freeze, ["~> 12.0.0"]) end else s.add_dependency(%q.freeze, ["~> 1.0.2"]) s.add_dependency(%q.freeze, ["~> 5.10.2"]) s.add_dependency(%q.freeze, [">= 1.4.4"]) s.add_dependency(%q.freeze, ["~> 1.4"]) s.add_dependency(%q.freeze, ["~> 12.0.0"]) end end sanitize-4.6.6/LICENSE0000644000004100000410000000205713337503361014422 0ustar www-datawww-dataCopyright (c) 2015 Ryan Grove Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. sanitize-4.6.6/lib/0000755000004100000410000000000013337503361014157 5ustar www-datawww-datasanitize-4.6.6/lib/sanitize/0000755000004100000410000000000013337503361016005 5ustar www-datawww-datasanitize-4.6.6/lib/sanitize/version.rb0000644000004100000410000000007213337503361020016 0ustar www-datawww-data# encoding: utf-8 class Sanitize VERSION = '4.6.6' end sanitize-4.6.6/lib/sanitize/transformers/0000755000004100000410000000000013337503361020532 5ustar www-datawww-datasanitize-4.6.6/lib/sanitize/transformers/clean_element.rb0000644000004100000410000001503113337503361023652 0ustar www-datawww-data# encoding: utf-8 require 'set' class Sanitize; module Transformers; class CleanElement # Matches a valid HTML5 data attribute name. The unicode ranges included here # are a conservative subset of the full range of characters that are # technically allowed, with the intent of matching the most common characters # used in data attribute names while excluding uncommon or potentially # misleading characters, or characters with the potential to be normalized # into unsafe or confusing forms. # # If you need data attr names with characters that aren't included here (such # as combining marks, full-width characters, or CJK), please consider creating # a custom transformer to validate attributes according to your needs. # # http://www.whatwg.org/specs/web-apps/current-work/multipage/elements.html#embedding-custom-non-visible-data-with-the-data-*-attributes REGEX_DATA_ATTR = /\Adata-(?!xml)[a-z_][\w.\u00E0-\u00F6\u00F8-\u017F\u01DD-\u02AF-]*\z/u # Attributes that need additional escaping on `` elements due to unsafe # libxml2 behavior. UNSAFE_LIBXML_ATTRS_A = Set.new(%w[ name ]) # Attributes that need additional escaping on all elements due to unsafe # libxml2 behavior. UNSAFE_LIBXML_ATTRS_GLOBAL = Set.new(%w[ action href src ]) # Mapping of original characters to escape sequences for characters that # should be escaped in attributes affected by unsafe libxml2 behavior. UNSAFE_LIBXML_ESCAPE_CHARS = { ' ' => '%20', '"' => '%22' } # Regex that matches any single character that needs to be escaped in # attributes affected by unsafe libxml2 behavior. UNSAFE_LIBXML_ESCAPE_REGEX = /[ "]/ def initialize(config) @add_attributes = config[:add_attributes] @attributes = config[:attributes].dup @elements = config[:elements] @protocols = config[:protocols] @remove_all_contents = false @remove_element_contents = Set.new @whitespace_elements = {} @attributes.each do |element_name, attrs| unless element_name == :all @attributes[element_name] = Set.new(attrs).merge(@attributes[:all] || []) end end # Backcompat: if :whitespace_elements is a Set, convert it to a hash. if config[:whitespace_elements].is_a?(Set) config[:whitespace_elements].each do |element| @whitespace_elements[element] = {:before => ' ', :after => ' '} end else @whitespace_elements = config[:whitespace_elements] end if config[:remove_contents].is_a?(Set) @remove_element_contents.merge(config[:remove_contents].map(&:to_s)) else @remove_all_contents = !!config[:remove_contents] end end def call(env) node = env[:node] return if node.type != Nokogiri::XML::Node::ELEMENT_NODE || env[:is_whitelisted] name = env[:node_name] # Delete any element that isn't in the config whitelist, unless the node has # already been deleted from the document. # # It's important that we not try to reparent the children of a node that has # already been deleted, since that seems to trigger a memory leak in # Nokogiri. unless @elements.include?(name) || node.parent.nil? # Elements like br, div, p, etc. need to be replaced with whitespace in # order to preserve readability. if @whitespace_elements.include?(name) node.add_previous_sibling(Nokogiri::XML::Text.new(@whitespace_elements[name][:before].to_s, node.document)) unless node.children.empty? node.add_next_sibling(Nokogiri::XML::Text.new(@whitespace_elements[name][:after].to_s, node.document)) end end unless @remove_all_contents || @remove_element_contents.include?(name) node.add_previous_sibling(node.children) end node.unlink return end attr_whitelist = @attributes[name] || @attributes[:all] if attr_whitelist.nil? # Delete all attributes from elements with no whitelisted attributes. node.attribute_nodes.each {|attr| attr.unlink } else allow_data_attributes = attr_whitelist.include?(:data) # Delete any attribute that isn't allowed on this element. node.attribute_nodes.each do |attr| attr_name = attr.name.downcase unless attr_whitelist.include?(attr_name) # The attribute isn't whitelisted. if allow_data_attributes && attr_name.start_with?('data-') # Arbitrary data attributes are allowed. If this is a data # attribute, continue. next if attr_name =~ REGEX_DATA_ATTR end # Either the attribute isn't a data attribute or arbitrary data # attributes aren't allowed. Remove the attribute. attr.unlink next end # The attribute is whitelisted. # Remove any attributes that use unacceptable protocols. if @protocols.include?(name) && @protocols[name].include?(attr_name) attr_protocols = @protocols[name][attr_name] if attr.value =~ REGEX_PROTOCOL unless attr_protocols.include?($1.downcase) attr.unlink next end else unless attr_protocols.include?(:relative) attr.unlink next end end # Leading and trailing whitespace around URLs is ignored at parse # time. Stripping it here prevents it from being escaped by the # libxml2 workaround below. attr.value = attr.value.strip end # libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an # attempt to preserve server-side includes. This can result in XSS since # an unescaped double quote can allow an attacker to inject a # non-whitelisted attribute. # # Sanitize works around this by implementing its own escaping for # affected attributes, some of which can exist on any element and some # of which can only exist on `` elements. # # The relevant libxml2 code is here: # if UNSAFE_LIBXML_ATTRS_GLOBAL.include?(attr_name) || (name == 'a' && UNSAFE_LIBXML_ATTRS_A.include?(attr_name)) attr.value = attr.value.gsub(UNSAFE_LIBXML_ESCAPE_REGEX, UNSAFE_LIBXML_ESCAPE_CHARS) end end end # Add required attributes. if @add_attributes.include?(name) @add_attributes[name].each {|key, val| node[key] = val } end end end; end; end sanitize-4.6.6/lib/sanitize/transformers/clean_cdata.rb0000644000004100000410000000041013337503361023270 0ustar www-datawww-data# encoding: utf-8 class Sanitize; module Transformers CleanCDATA = lambda do |env| node = env[:node] if node.type == Nokogiri::XML::Node::CDATA_SECTION_NODE node.replace(Nokogiri::XML::Text.new(node.text, node.document)) end end end; end sanitize-4.6.6/lib/sanitize/transformers/clean_doctype.rb0000644000004100000410000000051013337503361023664 0ustar www-datawww-data# encoding: utf-8 class Sanitize; module Transformers CleanDoctype = lambda do |env| return if env[:is_whitelisted] node = env[:node] if node.type == Nokogiri::XML::Node::DTD_NODE if env[:config][:allow_doctype] node.name = 'html' else node.unlink end end end end; end sanitize-4.6.6/lib/sanitize/transformers/clean_comment.rb0000644000004100000410000000035413337503361023665 0ustar www-datawww-data# encoding: utf-8 class Sanitize; module Transformers CleanComment = lambda do |env| node = env[:node] if node.type == Nokogiri::XML::Node::COMMENT_NODE node.unlink unless env[:is_whitelisted] end end end; end sanitize-4.6.6/lib/sanitize/transformers/clean_css.rb0000644000004100000410000000241413337503361023012 0ustar www-datawww-dataclass Sanitize; module Transformers; module CSS # Enforces a CSS whitelist on the contents of `style` attributes. class CleanAttribute def initialize(sanitizer_or_config) if Sanitize::CSS === sanitizer_or_config @scss = sanitizer_or_config else @scss = Sanitize::CSS.new(sanitizer_or_config) end end def call(env) node = env[:node] return unless node.type == Nokogiri::XML::Node::ELEMENT_NODE && node.key?('style') && !env[:is_whitelisted] attr = node.attribute('style') css = @scss.properties(attr.value) if css.strip.empty? attr.unlink else attr.value = css end end end # Enforces a CSS whitelist on the contents of `