pax_global_header00006660000000000000000000000064124110377130014511gustar00rootroot0000000000000052 comment=1a806136265ba6aba3ccdeaad0d686c3e1607bd7 rails-deprecated_sanitizer-1.0.3/000077500000000000000000000000001241103771300170125ustar00rootroot00000000000000rails-deprecated_sanitizer-1.0.3/.gitignore000066400000000000000000000002711241103771300210020ustar00rootroot00000000000000*.gem *.rbc .bundle .config .yardoc Gemfile.lock InstalledFiles _yardoc coverage doc/ lib/bundler/man pkg rdoc spec/reports test/tmp test/version_tmp tmp *.bundle *.so *.o *.a mkmf.log rails-deprecated_sanitizer-1.0.3/.travis.yml000066400000000000000000000010351241103771300211220ustar00rootroot00000000000000language: ruby rvm: - 1.9.3 - 2.0.0 - 2.1 - rbx-2 - jruby - ruby-head matrix: fast_finish: true allow_failures: - rvm: jruby notifications: email: false irc: on_success: change on_failure: always channels: - "irc.freenode.org#rails-contrib" campfire: on_success: change on_failure: always rooms: - secure: "ijYUg+G13awfLIOb3kfd+UewcTuTumCsCG8lo/bYR/3phRMI6dhJujS+f69TRn6XXeS7ohg2Vhkh+3pc1GdxZo6KbsXoiyuzv6wkQPzIJwg8UNBeLEdLEk5LpeAQu29MtRQC9joyHYv5vt7UX3b/VekTVITCRxJ4B2yFTx0oxJI=" rails-deprecated_sanitizer-1.0.3/CHANGELOG.md000066400000000000000000000003341241103771300206230ustar00rootroot00000000000000## 1.0.3 * Improved support for Rails 4.2.0.beta2 and above. ## 1.0.2 * Remove warning of method redefined. ## 1.0.1 * Fix autoload issue. * Added a railtie to eager load the HTML module. ## 1.0.0 * First release rails-deprecated_sanitizer-1.0.3/Gemfile000066400000000000000000000001131241103771300203000ustar00rootroot00000000000000source 'https://rubygems.org' gem 'rails', github: 'rails/rails' gemspec rails-deprecated_sanitizer-1.0.3/LICENSE.txt000066400000000000000000000020451241103771300206360ustar00rootroot00000000000000Copyright (c) 2014 Timm MIT License Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. rails-deprecated_sanitizer-1.0.3/README.md000066400000000000000000000013771241103771300203010ustar00rootroot00000000000000# Rails::Deprecated::Sanitizer In Rails 4.2 HTML sanitization has been rewritten using a more secure library. This gem includes the old behavior shipping with Rails 4.2 and before. It is strictly provided to ease migration. It will be supported until Rails 5. To downgrade add `gem 'rails-deprecated_sanitizer'` to your Gemfile. See the Rails 4.2 upgrade guide for more information. You can read more about the new sanitization implementation here: [rails-html-sanitizer](https://github.com/rails/rails-html-sanitizer). # Reporting XSS Security Issues The code provided here deals with XSS attacks and is therefore a security concern. So if you find a security issue please follow the [regular security reporting guidelines](http://rubyonrails.org/security/). rails-deprecated_sanitizer-1.0.3/Rakefile000066400000000000000000000003031241103771300204530ustar00rootroot00000000000000require "bundler/gem_tasks" require "rake/testtask" task default: :test Rake::TestTask.new do |t| t.libs = ['test'] t.pattern = 'test/**/*_test.rb' t.warning = true t.verbose = true end rails-deprecated_sanitizer-1.0.3/lib/000077500000000000000000000000001241103771300175605ustar00rootroot00000000000000rails-deprecated_sanitizer-1.0.3/lib/rails-deprecated_sanitizer.rb000066400000000000000000000000451241103771300254040ustar00rootroot00000000000000require 'rails/deprecated_sanitizer' rails-deprecated_sanitizer-1.0.3/lib/rails/000077500000000000000000000000001241103771300206725ustar00rootroot00000000000000rails-deprecated_sanitizer-1.0.3/lib/rails/deprecated_sanitizer.rb000066400000000000000000000114401241103771300254070ustar00rootroot00000000000000require "rails/deprecated_sanitizer/version" require "rails/deprecated_sanitizer/html-scanner" require "rails/deprecated_sanitizer/railtie" if defined?(Rails::Railtie) require "active_support/core_ext/module/remove_method" module Rails module DeprecatedSanitizer extend self def full_sanitizer HTML::FullSanitizer end def link_sanitizer HTML::LinkSanitizer end def white_list_sanitizer HTML::WhiteListSanitizer end end end module ActionView module Helpers module SanitizeHelper module ClassMethods redefine_method :sanitizer_vendor do Rails::DeprecatedSanitizer end redefine_method :sanitized_protocol_separator do white_list_sanitizer.protocol_separator end redefine_method :sanitized_uri_attributes do white_list_sanitizer.uri_attributes end redefine_method :sanitized_bad_tags do white_list_sanitizer.bad_tags end redefine_method :sanitized_allowed_css_properties do white_list_sanitizer.allowed_css_properties end redefine_method :sanitized_allowed_css_keywords do white_list_sanitizer.allowed_css_keywords end redefine_method :sanitized_shorthand_css_properties do white_list_sanitizer.shorthand_css_properties end redefine_method :sanitized_allowed_protocols do white_list_sanitizer.allowed_protocols end redefine_method :sanitized_protocol_separator= do |value| white_list_sanitizer.protocol_separator = value end # Adds valid HTML attributes that the +sanitize+ helper checks for URIs. # # class Application < Rails::Application # config.action_view.sanitized_uri_attributes = 'lowsrc', 'target' # end # redefine_method :sanitized_uri_attributes= do |attributes| HTML::WhiteListSanitizer.uri_attributes.merge(attributes) end # Adds to the Set of 'bad' tags for the +sanitize+ helper. # # class Application < Rails::Application # config.action_view.sanitized_bad_tags = 'embed', 'object' # end # redefine_method :sanitized_bad_tags= do |attributes| HTML::WhiteListSanitizer.bad_tags.merge(attributes) end # Adds to the Set of allowed tags for the +sanitize+ helper. # # class Application < Rails::Application # config.action_view.sanitized_allowed_tags = 'table', 'tr', 'td' # end # redefine_method :sanitized_allowed_tags= do |attributes| HTML::WhiteListSanitizer.allowed_tags.merge(attributes) end # Adds to the Set of allowed HTML attributes for the +sanitize+ helper. # # class Application < Rails::Application # config.action_view.sanitized_allowed_attributes = ['onclick', 'longdesc'] # end # redefine_method :sanitized_allowed_attributes= do |attributes| HTML::WhiteListSanitizer.allowed_attributes.merge(attributes) end # Adds to the Set of allowed CSS properties for the #sanitize and +sanitize_css+ helpers. # # class Application < Rails::Application # config.action_view.sanitized_allowed_css_properties = 'expression' # end # redefine_method :sanitized_allowed_css_properties= do |attributes| HTML::WhiteListSanitizer.allowed_css_properties.merge(attributes) end # Adds to the Set of allowed CSS keywords for the +sanitize+ and +sanitize_css+ helpers. # # class Application < Rails::Application # config.action_view.sanitized_allowed_css_keywords = 'expression' # end # redefine_method :sanitized_allowed_css_keywords= do |attributes| HTML::WhiteListSanitizer.allowed_css_keywords.merge(attributes) end # Adds to the Set of allowed shorthand CSS properties for the +sanitize+ and +sanitize_css+ helpers. # # class Application < Rails::Application # config.action_view.sanitized_shorthand_css_properties = 'expression' # end # redefine_method :sanitized_shorthand_css_properties= do |attributes| HTML::WhiteListSanitizer.shorthand_css_properties.merge(attributes) end # Adds to the Set of allowed protocols for the +sanitize+ helper. # # class Application < Rails::Application # config.action_view.sanitized_allowed_protocols = 'ssh', 'feed' # end # redefine_method :sanitized_allowed_protocols= do |attributes| HTML::WhiteListSanitizer.allowed_protocols.merge(attributes) end end end end end rails-deprecated_sanitizer-1.0.3/lib/rails/deprecated_sanitizer/000077500000000000000000000000001241103771300250625ustar00rootroot00000000000000rails-deprecated_sanitizer-1.0.3/lib/rails/deprecated_sanitizer/html-scanner.rb000066400000000000000000000012211241103771300277760ustar00rootroot00000000000000require 'active_support/dependencies/autoload' $LOAD_PATH.unshift "#{File.dirname(__FILE__)}/html-scanner" module HTML extend ActiveSupport::Autoload eager_autoload do autoload :CDATA, 'html/node' autoload :Document, 'html/document' autoload :FullSanitizer, 'html/sanitizer' autoload :LinkSanitizer, 'html/sanitizer' autoload :Node, 'html/node' autoload :Sanitizer, 'html/sanitizer' autoload :Selector, 'html/selector' autoload :Tag, 'html/node' autoload :Text, 'html/node' autoload :Tokenizer, 'html/tokenizer' autoload :Version, 'html/version' autoload :WhiteListSanitizer, 'html/sanitizer' end end rails-deprecated_sanitizer-1.0.3/lib/rails/deprecated_sanitizer/html-scanner/000077500000000000000000000000001241103771300274555ustar00rootroot00000000000000rails-deprecated_sanitizer-1.0.3/lib/rails/deprecated_sanitizer/html-scanner/html/000077500000000000000000000000001241103771300304215ustar00rootroot00000000000000rails-deprecated_sanitizer-1.0.3/lib/rails/deprecated_sanitizer/html-scanner/html/document.rb000066400000000000000000000045741241103771300325760ustar00rootroot00000000000000require 'html/tokenizer' require 'html/node' require 'html/selector' require 'html/sanitizer' module HTML #:nodoc: # A top-level HTML document. You give it a body of text, and it will parse that # text into a tree of nodes. class Document #:nodoc: # The root of the parsed document. attr_reader :root # Create a new Document from the given text. def initialize(text, strict=false, xml=false) tokenizer = Tokenizer.new(text) @root = Node.new(nil) node_stack = [ @root ] while token = tokenizer.next node = Node.parse(node_stack.last, tokenizer.line, tokenizer.position, token, strict) node_stack.last.children << node unless node.tag? && node.closing == :close if node.tag? if node_stack.length > 1 && node.closing == :close if node_stack.last.name == node.name if node_stack.last.children.empty? node_stack.last.children << Text.new(node_stack.last, node.line, node.position, "") end node_stack.pop else open_start = node_stack.last.position - 20 open_start = 0 if open_start < 0 close_start = node.position - 20 close_start = 0 if close_start < 0 msg = < hash } unless Hash === hash hash = keys_to_symbols(hash) hash.each do |k,v| case k when :tag, :content then # keys are valid, and require no further processing when :attributes then hash[k] = keys_to_strings(v) when :parent, :child, :ancestor, :descendant, :sibling, :before, :after hash[k] = Conditions.new(v) when :children hash[k] = v = keys_to_symbols(v) v.each do |key,value| case key when :count, :greater_than, :less_than # keys are valid, and require no further processing when :only v[key] = Conditions.new(value) else raise "illegal key #{key.inspect} => #{value.inspect}" end end else raise "illegal key #{k.inspect} => #{v.inspect}" end end update hash end private def keys_to_strings(hash) Hash[hash.keys.map {|k| [k.to_s, hash[k]]}] end def keys_to_symbols(hash) Hash[hash.keys.map do |k| raise "illegal key #{k.inspect}" unless k.respond_to?(:to_sym) [k.to_sym, hash[k]] end] end end # The base class of all nodes, textual and otherwise, in an HTML document. class Node #:nodoc: # The array of children of this node. Not all nodes have children. attr_reader :children # The parent node of this node. All nodes have a parent, except for the # root node. attr_reader :parent # The line number of the input where this node was begun attr_reader :line # The byte position in the input where this node was begun attr_reader :position # Create a new node as a child of the given parent. def initialize(parent, line=0, pos=0) @parent = parent @children = [] @line, @position = line, pos end # Returns a textual representation of the node. def to_s @children.join() end # Returns false (subclasses must override this to provide specific matching # behavior.) +conditions+ may be of any type. def match(conditions) false end # Search the children of this node for the first node for which #find # returns non +nil+. Returns the result of the #find call that succeeded. def find(conditions) conditions = validate_conditions(conditions) @children.each do |child| node = child.find(conditions) return node if node end nil end # Search for all nodes that match the given conditions, and return them # as an array. def find_all(conditions) conditions = validate_conditions(conditions) matches = [] matches << self if match(conditions) @children.each do |child| matches.concat child.find_all(conditions) end matches end # Returns +false+. Subclasses may override this if they define a kind of # tag. def tag? false end def validate_conditions(conditions) Conditions === conditions ? conditions : Conditions.new(conditions) end def ==(node) return false unless self.class == node.class && children.size == node.children.size equivalent = true children.size.times do |i| equivalent &&= children[i] == node.children[i] end equivalent end class </) if strict raise "expected ]]> (got #{scanner.rest.inspect} for #{content})" else scanner.skip_until(/\Z/) end end return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/\/]+/) name.downcase! unless closing scanner.skip(/\s*/) attributes = {} while attr = scanner.scan(/[-\w:]+/) value = true if scanner.scan(/\s*=\s*/) if delim = scanner.scan(/['"]/) value = "" while text = scanner.scan(/[^#{delim}\\]+|./) case text when "\\" then value << text break if scanner.eos? value << scanner.getch when delim break else value << text end end else value = scanner.scan(/[^\s>\/]+/) end end attributes[attr.downcase] = value scanner.skip(/\s*/) end closing = ( scanner.scan(/\//) ? :self : nil ) end unless scanner.scan(/\s*>/) if strict raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})" else # throw away all text until we find what we're looking for scanner.skip_until(/>/) or scanner.terminate end end Tag.new(parent, line, pos, name, attributes, closing) end end end end # A node that represents text, rather than markup. class Text < Node #:nodoc: attr_reader :content # Creates a new text node as a child of the given parent, with the given # content. def initialize(parent, line, pos, content) super(parent, line, pos) @content = content end # Returns the content of this node. def to_s @content end # Returns +self+ if this node meets the given conditions. Text nodes support # conditions of the following kinds: # # * if +conditions+ is a string, it must be a substring of the node's # content # * if +conditions+ is a regular expression, it must match the node's # content # * if +conditions+ is a hash, it must contain a :content key that # is either a string or a regexp, and which is interpreted as described # above. def find(conditions) match(conditions) && self end # Returns non-+nil+ if this node meets the given conditions, or +nil+ # otherwise. See the discussion of #find for the valid conditions. def match(conditions) case conditions when String @content == conditions when Regexp @content =~ conditions when Hash conditions = validate_conditions(conditions) # Text nodes only have :content, :parent, :ancestor unless (conditions.keys - [:content, :parent, :ancestor]).empty? return false end match(conditions[:content]) else nil end end def ==(node) return false unless super content == node.content end end # A CDATA node is simply a text node with a specialized way of displaying # itself. class CDATA < Text #:nodoc: def to_s "" end end # A Tag is any node that represents markup. It may be an opening tag, a # closing tag, or a self-closing tag. It has a name, and may have a hash of # attributes. class Tag < Node #:nodoc: # Either +nil+, :close, or :self attr_reader :closing # Either +nil+, or a hash of attributes for this node. attr_reader :attributes # The name of this tag. attr_reader :name # Create a new node as a child of the given parent, using the given content # to describe the node. It will be parsed and the node name, attributes and # closing status extracted. def initialize(parent, line, pos, name, attributes, closing) super(parent, line, pos) @name = name @attributes = attributes @closing = closing end # A convenience for obtaining an attribute of the node. Returns +nil+ if # the node has no attributes. def [](attr) @attributes ? @attributes[attr] : nil end # Returns non-+nil+ if this tag can contain child nodes. def childless?(xml = false) return false if xml && @closing.nil? !@closing.nil? || @name =~ /^(img|br|hr|link|meta|area|base|basefont| col|frame|input|isindex|param)$/ox end # Returns a textual representation of the node def to_s if @closing == :close "" else s = "<#{@name}" @attributes.each do |k,v| s << " #{k}" s << "=\"#{v}\"" if String === v end s << " /" if @closing == :self s << ">" @children.each { |child| s << child.to_s } s << "" if @closing != :self && !@children.empty? s end end # If either the node or any of its children meet the given conditions, the # matching node is returned. Otherwise, +nil+ is returned. (See the # description of the valid conditions in the +match+ method.) def find(conditions) match(conditions) && self || super end # Returns +true+, indicating that this node represents an HTML tag. def tag? true end # Returns +true+ if the node meets any of the given conditions. The # +conditions+ parameter must be a hash of any of the following keys # (all are optional): # # * :tag: the node name must match the corresponding value # * :attributes: a hash. The node's values must match the # corresponding values in the hash. # * :parent: a hash. The node's parent must match the # corresponding hash. # * :child: a hash. At least one of the node's immediate children # must meet the criteria described by the hash. # * :ancestor: a hash. At least one of the node's ancestors must # meet the criteria described by the hash. # * :descendant: a hash. At least one of the node's descendants # must meet the criteria described by the hash. # * :sibling: a hash. At least one of the node's siblings must # meet the criteria described by the hash. # * :after: a hash. The node must be after any sibling meeting # the criteria described by the hash, and at least one sibling must match. # * :before: a hash. The node must be before any sibling meeting # the criteria described by the hash, and at least one sibling must match. # * :children: a hash, for counting children of a node. Accepts the # keys: # ** :count: either a number or a range which must equal (or # include) the number of children that match. # ** :less_than: the number of matching children must be less than # this number. # ** :greater_than: the number of matching children must be # greater than this number. # ** :only: another hash consisting of the keys to use # to match on the children, and only matching children will be # counted. # # Conditions are matched using the following algorithm: # # * if the condition is a string, it must be a substring of the value. # * if the condition is a regexp, it must match the value. # * if the condition is a number, the value must match number.to_s. # * if the condition is +true+, the value must not be +nil+. # * if the condition is +false+ or +nil+, the value must be +nil+. # # Usage: # # # test if the node is a "span" tag # node.match tag: "span" # # # test if the node's parent is a "div" # node.match parent: { tag: "div" } # # # test if any of the node's ancestors are "table" tags # node.match ancestor: { tag: "table" } # # # test if any of the node's immediate children are "em" tags # node.match child: { tag: "em" } # # # test if any of the node's descendants are "strong" tags # node.match descendant: { tag: "strong" } # # # test if the node has between 2 and 4 span tags as immediate children # node.match children: { count: 2..4, only: { tag: "span" } } # # # get funky: test to see if the node is a "div", has a "ul" ancestor # # and an "li" parent (with "class" = "enum"), and whether or not it has # # a "span" descendant that contains # text matching /hello world/: # node.match tag: "div", # ancestor: { tag: "ul" }, # parent: { tag: "li", # attributes: { class: "enum" } }, # descendant: { tag: "span", # child: /hello world/ } def match(conditions) conditions = validate_conditions(conditions) # check content of child nodes if conditions[:content] if children.empty? return false unless match_condition("", conditions[:content]) else return false unless children.find { |child| child.match(conditions[:content]) } end end # test the name return false unless match_condition(@name, conditions[:tag]) if conditions[:tag] # test attributes (conditions[:attributes] || {}).each do |key, value| return false unless match_condition(self[key], value) end # test parent return false unless parent.match(conditions[:parent]) if conditions[:parent] # test children return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child] # test ancestors if conditions[:ancestor] return false unless catch :found do p = self throw :found, true if p.match(conditions[:ancestor]) while p = p.parent end end # test descendants if conditions[:descendant] return false unless children.find do |child| # test the child child.match(conditions[:descendant]) || # test the child's descendants child.match(:descendant => conditions[:descendant]) end end # count children if opts = conditions[:children] matches = children.select do |c| (c.kind_of?(HTML::Tag) and (c.closing == :self or ! c.childless?)) end matches = matches.select { |c| c.match(opts[:only]) } if opts[:only] opts.each do |key, value| next if key == :only case key when :count if Integer === value return false if matches.length != value else return false unless value.include?(matches.length) end when :less_than return false unless matches.length < value when :greater_than return false unless matches.length > value else raise "unknown count condition #{key}" end end end # test siblings if conditions[:sibling] || conditions[:before] || conditions[:after] siblings = parent ? parent.children : [] self_index = siblings.index(self) if conditions[:sibling] return false unless siblings.detect do |s| s != self && s.match(conditions[:sibling]) end end if conditions[:before] return false unless siblings[self_index+1..-1].detect do |s| s != self && s.match(conditions[:before]) end end if conditions[:after] return false unless siblings[0,self_index].detect do |s| s != self && s.match(conditions[:after]) end end end true end def ==(node) return false unless super return false unless closing == node.closing && self.name == node.name attributes == node.attributes end private # Match the given value to the given condition. def match_condition(value, condition) case condition when String value && value == condition when Regexp value && value.match(condition) when Numeric value == condition.to_s when true !value.nil? when false, nil value.nil? else false end end end end rails-deprecated_sanitizer-1.0.3/lib/rails/deprecated_sanitizer/html-scanner/html/sanitizer.rb000066400000000000000000000164771241103771300327750ustar00rootroot00000000000000require 'set' require 'cgi' require 'active_support/core_ext/module/attribute_accessors' module HTML class Sanitizer def sanitize(text, options = {}) validate_options(options) return text unless sanitizeable?(text) tokenize(text, options).join end def sanitizeable?(text) !(text.nil? || text.empty? || !text.index("<")) end protected def tokenize(text, options) tokenizer = HTML::Tokenizer.new(text) result = [] while token = tokenizer.next node = Node.parse(nil, 0, 0, token, false) process_node node, result, options end result end def process_node(node, result, options) result << node.to_s end def validate_options(options) if options[:tags] && !options[:tags].is_a?(Enumerable) raise ArgumentError, "You should pass :tags as an Enumerable" end if options[:attributes] && !options[:attributes].is_a?(Enumerable) raise ArgumentError, "You should pass :attributes as an Enumerable" end end end class FullSanitizer < Sanitizer def sanitize(text, options = {}) result = super # strip any comments, and if they have a newline at the end (ie. line with # only a comment) strip that too result = result.gsub(/[\n]?/m, "") if (result && result =~ /[\n]?/m) # Recurse - handle all dirty nested tags result == text ? result : sanitize(result, options) end def process_node(node, result, options) result << node.to_s if node.class == HTML::Text end end class LinkSanitizer < FullSanitizer cattr_accessor :included_tags, :instance_writer => false self.included_tags = Set.new(%w(a href)) def sanitizeable?(text) !(text.nil? || text.empty? || !((text.index(""))) end protected def process_node(node, result, options) result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name) end end class WhiteListSanitizer < Sanitizer [:protocol_separator, :uri_attributes, :allowed_attributes, :allowed_tags, :allowed_protocols, :bad_tags, :allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties].each do |attr| class_attribute attr, :instance_writer => false end # A regular expression of the valid characters used to separate protocols like # the ':' in 'http://foo.com' self.protocol_separator = /:|(�*58)|(p)|(�*3a)|(%|%)3A/i # Specifies a Set of HTML attributes that can have URIs. self.uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc)) # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed # to just escaping harmless tags like <font> self.bad_tags = Set.new(%w(script)) # Specifies the default Set of tags that the #sanitize helper will allow unscathed. self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr acronym a img blockquote del ins)) # Specifies the default Set of html attributes that the #sanitize helper will leave # in the allowed tag. self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr)) # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept. self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto feed svn urn aim rsync tag ssh sftp rtsp afs)) # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept. self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse border-color border-left-color border-right-color border-top-color clear color cursor direction display elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space width)) # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept. self.allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal nowrap olive pointer purple red right solid silver teal top transparent underline white yellow)) # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers. self.shorthand_css_properties = Set.new(%w(background border margin padding)) # Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute def sanitize_css(style) # disallow urls style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ') # gauntlet if style !~ /\A([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*\z/ || style !~ /\A(\s*[-\w]+\s*:\s*[^:;]*(;|$)\s*)*\z/ return '' end clean = [] style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val| if allowed_css_properties.include?(prop.downcase) clean << prop + ': ' + val + ';' elsif shorthand_css_properties.include?(prop.split('-')[0].downcase) unless val.split().any? do |keyword| !allowed_css_keywords.include?(keyword) && keyword !~ /\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/ end clean << prop + ': ' + val + ';' end end end clean.join(' ') end protected def tokenize(text, options) options[:parent] = [] options[:attributes] ||= allowed_attributes options[:tags] ||= allowed_tags super end def process_node(node, result, options) result << case node when HTML::Tag if node.closing == :close options[:parent].shift else options[:parent].unshift node.name end process_attributes_for node, options options[:tags].include?(node.name) ? node : nil else bad_tags.include?(options[:parent].first) ? nil : node.to_s.gsub(//login. # # === Matching Elements # # Use the #match method to determine if an element matches the selector. # # For simple selectors, the method returns an array with that element, # or +nil+ if the element does not match. For complex selectors (see below) # the method returns an array with all matched elements, of +nil+ if no # match found. # # For example: # if selector.match(element) # puts "Element is a login form" # end # # === Selecting Elements # # Use the #select method to select all matching elements starting with # one element and going through all children in depth-first order. # # This method returns an array of all matching elements, an empty array # if no match is found # # For example: # selector = HTML::Selector.new "input[type=text]" # matches = selector.select(element) # matches.each do |match| # puts "Found text field with name #{match.attributes['name']}" # end # # === Expressions # # Selectors can match elements using any of the following criteria: # * name -- Match an element based on its name (tag name). # For example, p to match a paragraph. You can use * # to match any element. # * #id -- Match an element based on its identifier (the # id attribute). For example, #page. # * .class -- Match an element based on its class name, all # class names if more than one specified. # * [attr] -- Match an element that has the specified attribute. # * [attr=value] -- Match an element that has the specified # attribute and value. (More operators are supported see below) # * :pseudo-class -- Match an element based on a pseudo class, # such as :nth-child and :empty. # * :not(expr) -- Match an element that does not match the # negation expression. # # When using a combination of the above, the element name comes first # followed by identifier, class names, attributes, pseudo classes and # negation in any order. Do not separate these parts with spaces! # Space separation is used for descendant selectors. # # For example: # selector = HTML::Selector.new "form.login[action=/login]" # The matched element must be of type +form+ and have the class +login+. # It may have other classes, but the class +login+ is required to match. # It must also have an attribute called +action+ with the value # /login. # # This selector will match the following element: #