mechanize-2.7.5/0000755000004100000410000000000012772546476013544 5ustar www-datawww-datamechanize-2.7.5/Rakefile0000644000004100000410000000211712772546476015212 0ustar www-datawww-datarequire 'rubygems' begin require "bundler/gem_tasks" rescue LoadError end require 'rdoc/task' require 'rake/testtask' task :prerelease => [:clobber_rdoc, :test] desc "Update SSL Certificate" task('ssl_cert') do |p| sh "openssl genrsa -des3 -out server.key 1024" sh "openssl req -new -key server.key -out server.csr" sh "cp server.key server.key.org" sh "openssl rsa -in server.key.org -out server.key" sh "openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt" sh "cp server.key server.pem" sh "mv server.key server.csr server.crt server.pem test/data/" sh "rm server.key.org" end RDoc::Task.new do |rdoc| rdoc.main = "README.rdoc" rdoc.rdoc_dir = 'doc' rdoc.rdoc_files.include( "CHANGELOG.rdoc", "EXAMPLES.rdoc", "GUIDE.rdoc", "LICENSE.rdoc", "README.rdoc", "lib/**/*.rb") end desc "Run tests" Rake::TestTask.new { |t| t.test_files = Dir['test/**/test*.rb'] t.verbose = true } task publish_docs: %w[rdoc] do sh 'rsync', '-avzO', '--delete', 'doc/', 'docs-push.seattlerb.org:/data/www/docs.seattlerb.org/mechanize/' end task default: :test mechanize-2.7.5/EXAMPLES.rdoc0000644000004100000410000001207712772546476015642 0ustar www-datawww-data= Mechanize examples Note: Several examples show methods chained to the end of do/end blocks. do...end is the same as curly braces ({...}). For example, do ... end.submit is the same as { ... }.submit. == Google require 'rubygems' require 'mechanize' a = Mechanize.new { |agent| agent.user_agent_alias = 'Mac Safari' } a.get('http://google.com/') do |page| search_result = page.form_with(:id => 'gbqf') do |search| search.q = 'Hello world' end.submit search_result.links.each do |link| puts link.text end end == Rubyforge require 'rubygems' require 'mechanize' a = Mechanize.new a.get('http://rubyforge.org/') do |page| # Click the login link login_page = a.click(page.link_with(:text => /Log In/)) # Submit the login form my_page = login_page.form_with(:action => '/account/login.php') do |f| f.form_loginname = ARGV[0] f.form_pw = ARGV[1] end.click_button my_page.links.each do |link| text = link.text.strip next unless text.length > 0 puts text end end == File Upload Upload a file to flickr. require 'rubygems' require 'mechanize' abort "#{$0} login passwd filename" if (ARGV.size != 3) a = Mechanize.new { |agent| # Flickr refreshes after login agent.follow_meta_refresh = true } a.get('http://flickr.com/') do |home_page| signin_page = a.click(home_page.link_with(:text => /Sign In/)) my_page = signin_page.form_with(:name => 'login_form') do |form| form.login = ARGV[0] form.passwd = ARGV[1] end.submit # Click the upload link upload_page = a.click(my_page.link_with(:text => /Upload/)) # We want the basic upload page. upload_page = a.click(upload_page.link_with(:text => /basic Uploader/)) # Upload the file upload_page.form_with(:method => 'POST') do |upload_form| upload_form.file_uploads.first.file_name = ARGV[2] end.submit end == Pluggable Parsers Let's say you want HTML pages to automatically be parsed with Rubyful Soup. This example shows you how: require 'rubygems' require 'mechanize' require 'rubyful_soup' class SoupParser < Mechanize::Page attr_reader :soup def initialize(uri = nil, response = nil, body = nil, code = nil) @soup = BeautifulSoup.new(body) super(uri, response, body, code) end end agent = Mechanize.new agent.pluggable_parser.html = SoupParser Now all HTML pages will be parsed with the SoupParser class, and automatically give you access to a method called 'soup' where you can get access to the Beautiful Soup for that page. == Using a proxy require 'rubygems' require 'mechanize' agent = Mechanize.new agent.set_proxy 'localhost', 8000 page = agent.get(ARGV[0]) puts page.body == The transact method Mechanize#transact runs the given block and then resets the page history. I.e. after the block has been executed, you're back at the original page; no need to count how many times to call the back method at the end of a loop (while accounting for possible exceptions). This example also demonstrates subclassing Mechanize. require 'rubygems' require 'mechanize' class TestMech < Mechanize def process get 'http://rubyforge.org/' search_form = page.forms.first search_form.words = 'WWW' submit search_form page.links_with(:href => %r{/projects/} ).each do |link| next if link.href =~ %r{/projects/support/} puts 'Loading %-30s %s' % [link.href, link.text] begin transact do click link # Do stuff, maybe click more links. end # Now we're back at the original page. rescue => e $stderr.puts "#{e.class}: #{e.message}" end end end end TestMech.new.process == Client Certificate Authentication (Mutual Auth) In most cases a client certificate is created as an additional layer of security for certain websites. The specific case that this was initially tested on was for automating the download of archived images from a banks (Wachovia) lockbox system. Once the certificate is installed into your browser you will have to export it and split the certificate and private key into separate files. require 'rubygems' require 'mechanize' # create Mechanize instance agent = Mechanize.new # set the path of the certificate file agent.cert = 'example.cer' # set the path of the private key file agent.key = 'example.key' # get the login form & fill it out with the username/password login_form = agent.get("http://example.com/login_page").form('Login') login_form.Userid = 'TestUser' login_form.Password = 'TestPassword' # submit login form agent.submit(login_form, login_form.buttons.first) Exported files are usually in .p12 format (IE 7 & Firefox 2.0) which stands for PKCS #12. You can convert them from p12 to pem format by using the following commands: openssl pkcs12 -in input_file.p12 -clcerts -out example.key -nocerts -nodes openssl pkcs12 -in input_file.p12 -clcerts -out example.cer -nokeys mechanize-2.7.5/Gemfile0000644000004100000410000000004712772546476015040 0ustar www-datawww-datasource 'https://rubygems.org' gemspec mechanize-2.7.5/examples/0000755000004100000410000000000012772546476015362 5ustar www-datawww-datamechanize-2.7.5/examples/flickr_upload.rb0000644000004100000410000000100612772546476020522 0ustar www-datawww-datarequire 'rubygems' require 'mechanize' agent = Mechanize.new # Get the flickr sign in page page = agent.get 'http://flickr.com/signin/flickr/' # Fill out the login form form = page.form_with :name => 'flickrloginform' form.email = ARGV[0] form.password = ARGV[1] form.submit # Go to the upload page page = page.link_with(:text => 'Upload').click # Fill out the form form = page.forms.action('/photos_upload_process.gne').first form.file_uploads.name('file1').first.file_name = ARGV[2] form.submit mechanize-2.7.5/examples/wikipedia_links_to_philosophy.rb0000644000004100000410000000617712772546476024050 0ustar www-datawww-datarequire 'mechanize' require 'tsort' ## # This example implements the alt-text of http://xkcd.com/903/ which states: # # Wikipedia trivia: if you take any article, click on the first link in the # article text not in parentheses or italics, and then repeat, you will # eventually end up at "Philosophy". class WikipediaLinksToPhilosophy def initialize @agent = Mechanize.new @agent.user_agent_alias = 'Mac Safari' # Wikipedia blocks "mechanize" @history = @agent.history @wiki_url = URI 'http://en.wikipedia.org' @search_url = @wiki_url + '/w/index.php' @random_url = @wiki_url + '/wiki/Special:Random' @title = nil @seen = nil end ## # Retrieves the title of the current page def extract_title @page.title =~ /(.*) - Wikipedia/ @title = $1 end ## # Retrieves the initial page. If +query+ is not given a random page is # chosen def fetch_first_page query if query then search query else random end end ## # The search is finished if we've seen the page before or we've reached # Philosophy def finished? @seen or @title == 'Philosophy' end ## # Follows the first non-parenthetical, non-italic link in the main body of # the article. def follow_first_link puts @title # > p > a rejects italics links = @page.root.css('.mw-content-ltr > p > a[href^="/wiki/"]') # reject disambiguation and special pages, images and files links = links.reject do |link_node| link_node['href'] =~ %r%/wiki/\w+:|\(disambiguation\)% end links = links.reject do |link_node| in_parenthetical? link_node end link = links.first unless link then # disambiguation page? try the first item in the list link = @page.root.css('.mw-content-ltr > ul > li > a[href^="/wiki/"]').first end # convert a Nokogiri HTML element back to a mechanize link link = Mechanize::Page::Link.new link, @agent, @page return if @seen = @agent.visited?(link) @page = link.click extract_title end ## # Is +link_node+ in an open parenthetical section? def in_parenthetical? link_node siblings = link_node.parent.children seen = false before = siblings.reject do |node| seen or (seen = node == link_node) end preceding_text = before.map { |node| node.text }.join open = preceding_text.count '(' close = preceding_text.count ')' open > close end ## # Prints the result of the search def print_result if @seen then puts "[Loop detected]" else puts @title end puts # subtract initial search or Special:Random puts "After #{@agent.history.length - 1} pages" end ## # Retrieves a random page from wikipedia def random @page = @agent.get @random_url extract_title end ## # Entry point def run query = nil fetch_first_page query follow_first_link until finished? print_result end ## # Searches for +query+ on wikipedia def search query @page = @agent.get @search_url, search: query extract_title end end WikipediaLinksToPhilosophy.new.run ARGV.shift if $0 == __FILE__ mechanize-2.7.5/examples/spider.rb0000644000004100000410000000072512772546476017201 0ustar www-datawww-datarequire 'rubygems' require 'mechanize' agent = Mechanize.new agent.max_history = nil # unlimited history stack = agent.get(ARGV[0]).links while l = stack.pop next unless l.uri host = l.uri.host next unless host.nil? or host == agent.history.first.uri.host next if agent.visited? l.href puts "crawling #{l.uri}" begin page = l.click next unless Mechanize::Page === page stack.push(*page.links) rescue Mechanize::ResponseCodeError end end mechanize-2.7.5/examples/proxy_req.rb0000644000004100000410000000021412772546476017734 0ustar www-datawww-datarequire 'rubygems' require 'mechanize' agent = Mechanize.new agent.set_proxy('localhost', '8000') page = agent.get(ARGV[0]) puts page.body mechanize-2.7.5/examples/mech-dump.rb0000644000004100000410000000013612772546476017566 0ustar www-datawww-datarequire 'rubygems' require 'mechanize' agent = Mechanize.new puts agent.get(ARGV[0]).inspect mechanize-2.7.5/examples/rubygems.rb0000644000004100000410000000123312772546476017543 0ustar www-datawww-data# This example logs a user in to rubyforge and prints out the body of the # page after logging the user in. require 'rubygems' require 'mechanize' require 'logger' # Create a new mechanize object mech = Mechanize.new mech.log = Logger.new $stderr mech.agent.http.debug_output = $stderr # Load the rubyforge website page = mech.get('https://rubygems.org/') page = mech.click page.link_with(:text => /Sign in/) # Click the login link form = page.forms[1] # Select the first form form["session[who]"] = ARGV[0] form["session[password]"] = ARGV[1] form["commit"] = "Sign in" # Submit the form page = form.submit form.buttons.first puts page.body # Print out the body mechanize-2.7.5/.autotest0000644000004100000410000000015112772546476015412 0ustar www-datawww-datarequire 'autotest/restart' Autotest.add_hook :initialize do |at| at.testlib = 'minitest/autorun' end mechanize-2.7.5/CHANGELOG.rdoc0000644000004100000410000012237212772546476015713 0ustar www-datawww-data= Mechanize CHANGELOG === 2.7.5 * New Features * All 4xx responses and RedirectLimitReachedError when fetching robots.txt are treated as full allow just like Googlebot does. * Enable support for mime-types > 3. * Bug fix * Don't cause infinite loop when `GET /robots.txt` redirects. (#457) * Fix basic authentication for a realm that contains uppercase characters. (#458, #459) * Fix encoding error when uploading a file which name is non-ASCII. (#333) === 2.7.4 * New Features * Accept array-like and hash-like values as query/parameter value. A new utility method Mechanize::Util.each_parameter is added, and Mechanize::Util.build_query_string is enhanced for this feature. * Allow passing a `Form::FileUpload` instance to `#post`. #350 by Sam Rawlins. * Capture link when scheme is unsupported. #362 by Jon Rowe. * Pre-defined User-Agent stings are updated to those of more recent versions, and new aliases for IE 10/11 and Edge are added. * Support for mime-types 1.x is restored while keeping compatible with mime-types 2.x. * Mechanize::Page now responds to #xpath, #css, #at_xpath, #at_css, and #%. * element(s)_with methods now accept :xpath and :css options for doing xpath/css selector searching. * Pass URI information to Nokogiri where applicable. #405 @lulalala * Bug fix * Don't raise an exception if a connection has set a {read,open}_timeout and a `file://` request is made. (#397) * Fix whitespace bug in WWW-Authenticate. #451, #450, by Rasmus Bergholdt * Don't allow redirect from a non-file URL to a file URL for security reasons. (#455) === 2.7.3 * New Features * Allow net-http-persistent instance to be named. #324, John Weir. * #save and #save! return filename #340 * Updated mime-types requirement to 2.x versions. #348 by Jeff Nyman. * Bug fix * Ensure Download#save! defaults back to original filename if none is provided (#300) === 2.7.2 * Bug fix * API compatibility issues with Mechanize::CookieJar cookies has been addressed. https://github.com/sparklemotion/http-cookie/issues/2 #326 === 2.7.1 * Bug fix * Ensure images with no "src" attribute still return correct URLs. #317 * Fixes Mechanize::Parser#extract_filename where an empty string filename in the response caused unhandled exception. #318 === 2.7.0 * New Features * Mechanize::Agent#response_read will now raise a Mechanize::ResponseReadError instead of an EOFError and avoid losing requested content. #296. * Depend on http-cookie, add backwards compatible deprecations. #257 Akinori MUSHA. * Added `Download#save!` for overwriting existing files. #300 Sean Kim. * Bug fix * Ensure page URLs with whitespace in them are escaped #313 @pacop. * Added a workaround for a bug of URI#+ that led to failure in resolving a relative path containing double slash like "/../http://.../". #304 === 2.6.0 * New Features * Mechanize#start and Mechanize#shutdown (Thanks, Damian Janowski!) * Added Mechanize::Agent#allowed_error_codes for setting an Array of status codes which should not raise an error. #248 Laurence Rowe. * Added `File.save!` for overwriting existing files #219. * DirectorySaver::save_to now accepts an option to decode filename. #262 * element(s)_with methods now accept a :search option for doing xpath/css selector searching. #287 Philippe Bourgau * Added httponly option for Mechanize::Cookie #242 by Paolo Perego. * Added Mechanize::XmlFile as a default pluggable parser for handling XML responses. #289 * Minor enhancements * Added Mechanize::Download#save_as as an alias to #save. #246 * Fix documentation for `Mechanize::Page` element matchers. #269 * Added Mechanize::Form::Field#raw_value for fetching a fields value before it's sent through Mechanize::Util.html_unescape. #283 * Added iPad and Android user agents. #277 by sambit, #278 by seansay. * Bug fix * Mechanize#cert and Mechanize#key now return the values set by #cert= and #key=. #244, #245 (Thanks, Robert Gogolok!) * Mechanize no longer submits disabled form fields. #276 by Bogdan Gusiev, #279 by Ricardo Valeriano. * Mechanize::File#save now behaves like Mechanize::Download#save in that it will create the parent directory before saving. #272, #280 by Ryan Kowalick * Ensure `application/xml` is registered as an XML parser in `PluggableParser`, not just `text/xml`. #266 James Gregory * Mechanize now writes cookiestxt with a prefixed dot for wildcard domain handling. #295 by Mike Morearty. === 2.5.2 * New Features * Mechanize::CookieJar#save_as takes a keyword option "session" to say that session cookies should be saved. Based on #230 by Jim Jones. * Minor enhancements * Added Mechanize#follow_redirect= as an alias to redirect_ok=. * Bug fix * Fixed casing of the Mac Firefox user-agent alias to match Linux Firefox. In mechanize 3 the old "Mac FireFox" user-agent alias will be removed. Pull request #231 by Gavin Miller. * Mechanize now authenticates using the raw challenge, not a reconstructed one, to avoid dealing with quoting rules of RFC 2617. Fixes failures in #231 due to net-http-digest_auth 1.2.1 * Fixed Content-Disposition parameter parser to be case insensitive. #233 * Fixed redirection counting in following meta refresh. #240 === 2.5.1 * Bug fix * Mechanize no longer copies POST requests during a redirect which was introduced by #215. Pull request #229 by Godfrey Chan. === 2.5 * Minor enhancements * Added Mechanize#ignore_bad_chunking for working around servers that don't terminate chunked transfer-encoding properly. Enabling this may cause data loss. Issue #116 * Removed content-type check from Mechanize::Page allowing forced parsing of incorrect or missing content-types. Issue #221 by GarthSnyder * Bug fixes * Fixed typos in EXAMPLES and GUIDES. Pull Request #213 by Erkan Yilmaz. * Fixed handling of a quoted content-disposition size. Pull Request #220 by Jason Rust * Mechanize now ignores a missing gzip footer like browsers do. Issue #224 by afhbl * Mechanize handles saving of files with the same name better now. Pull Request #223 by Godfrey Chan, Issue #219 by Jon Hart * Mechanize now sends headers across redirects. Issue #215 by Chris Gahan * Mechanize now raises Mechanize::ResponseReadError when the server does not terminate chunked transfer-encoding properly. Issue #116 * Mechanize no longer raises an exception when multiple identical radiobuttons are checked. Issue #214 by Matthias Guenther * Fixed documentation for pre_connect_hooks and post_connect_hooks. Issue #226 by Robert Poor * Worked around ruby 1.8 run with -Ku and ISO-8859-1 encoded characters in URIs. Issue #228 by Stanislav O.Pogrebnyak === 2.4 * Security fix: Mechanize#auth and Mechanize#basic_auth allowed disclosure of passwords to malicious servers and have been deprecated. In prior versions of mechanize only one set of HTTP authentication credentials were allowed for all connections. If a mechanize instance connected to more than one server then a malicious server detecting mechanize could ask for HTTP Basic authentication. This would expose the username and password intended only for one server. Mechanize#auth and Mechanize#basic_auth now warn when used. To fix the warning switch to Mechanize#add_auth which requires the URI the credentials are intended for, the username and the password. Optionally an HTTP authentication realm or NTLM domain may be provided. * Minor enhancement * Improved exception messages for 401 Unauthorized responses. Mechanize now tells you if you were missing credentials, had an incorrect password, etc. === 2.3 / 2012-02-20 * Minor enhancements * Add support for the Max-Age attribute in the Set-Cookie header. * Added Mechanize::Download#body for compatibility with Mechanize::File when using Mechanize#get_file with Mechanize::Image or other Download-based pluggable parser. Issue #202 by angas * Mechanize#max_file_buffer may be set to nil to disable creation of Tempfiles. * Bug fixes * Applied Mechanize#max_file_buffer to the Content-Encoding handlers as well to prevent extra Tempfiles for small gzip or deflate response * Increased the default Mechanize#max_file_buffer to 100,000 bytes. This gives ~5MB of response bodies in memory with the default history setting of 50 pages (depending on GC behavior). * Ignore empty path/domain attributes. * Cookies with an empty Expires attribute value were stored as session cookies but cookies without the Expires attribute were not. Issue #78 === 2.2.1 / 2012-02-13 * Bug fixes * Add missing file to the gem, ensure that missing files won't cause failures again. Issue #201 by Alex * Fix minor grammar issue in README. Issue #200 by Shane Becker. === 2.2 / 2012-02-12 * API changes * MetaRefresh#href is not normalized to an absolute URL, but set to the original value and resolved later. It is even set to nil when the Refresh URL is unspecified or empty. * Minor enhancements * Expose ssl_version from net-http-persistent. Patch by astera. * SSL parameters and proxy may now be set at any time. Issue #194 by dsisnero. * Improved Mechanize::Page with #image_with and #images_with and Mechanize::Page::Image various img element attribute accessors, #caption, #extname, #mime_type and #fetch. Pull request #173 by kitamomonga * Added MIME type parsing for content-types in Mechanize::PluggableParser for fine-grained parser choices. Parsers will be chosen based on exact match, simplified type or media type in that order. See Mechanize::PluggableParser#[]=. * Added Mechanize#download which downloads a response body to an IO-like or filename. * Added Mechanize::DirectorySaver which saves responses in a single directory. Issue #187 by yoshie902a. * Added Mechanize::Page::Link#noreferrer? * The documentation for Mechanize::Page#search and #at now show that both XPath and CSS expressions are allowed. Issue #199 by Shane Becker. * Bug fixes * Fixed handling of a HEAD request with Accept-Encoding: gzip. Issue #198 by Oleg Dashevskii * Use #resolve for resolving a Location header value. fixes #197 * A Refresh value can have whitespaces around the semicolon and equal sign. * MetaRefresh#click no longer sends out Referer. * A link with an empty href is now resolved correctly where previously the query part was dropped. === 2.1.1 / 2012-02-03 * Bug fixes * Set missing idle_timeout default. Issue #196 * Meta refresh URIs are now escaped (excluding %). Issue #177 * Fix charset name extraction. Issue #180 * A Referer URI sent on request no longer includes user information or fragment part. * Tempfiles for storing response bodies are unlinked upon creation to avoid possible lack of finalization. Issue #183 * The default maximum history size is now 50 pages to avoid filling up a disk with tempfiles accidentally. Related to Issue #183 * Errors in bodies with deflate and gzip responses now result in a Mechanize::Error instead of silently being ignored and causing future errors. Issue #185 * Mechanize now raises an UnauthorizedError instead of crashing when a 403 response does not contain a www-authenticate header. Issue #181 * Mechanize gives a useful exception when attempting to click buttons across pages. Issue #186 * Added note to Mechanize#cert_store describing how to add certificates in case your system does not come with a default set. Issue #179 * Invalid content-disposition headers are now ignored. Issue #191 * Fix NTLM by recognizing the "Negotiation" challenge instead of endlessly looping. Issue #192 * Allow specification of the NTLM domain through Mechanize#auth. Issue #193 * Documented how to convert a Mechanize::ResponseReadError into a File or Page, along with a new method #force_parse. Issue #176 === 2.1 / 2011-12-20 * Deprecations * Mechanize#get no longer accepts an options hash. * Mechanize::Util::to_native_charset has been removed. * Minor enhancements * Mechanize now depends on net-http-persistent 2.3+. This new version brings idle timeouts to help with the dreaded "too many connection resets" issue when POSTing to a closed connection. Issue #123 * SSL connections will be verified against the system certificate store by default. * Added Mechanize#retry_change_requests to allow mechanize to retry POST and other non-idempotent requests when you know it is safe to do so. Issue #123 * Mechanize can now stream files directly to disk without loading them into memory first through Mechanize::Download, a pluggable parser for downloading files. All responses larger than Mechanize#max_file_buffer are downloaded to a Tempfile. For backwards compatibility Mechanize::File subclasses still load the response body into memory. To force all unknown content types to download to disk instead of memory set: agent.pluggable_parser.default = Mechanize::Download * Added Mechanize#content_encoding_hooks which allow handling of non-standard content encodings like "agzip". Patch #125 by kitamomonga * Added dom_class to elements and the element matcher like dom_id. Patch #156 by Dan Hansen. * Added support for the HTML5 keygen form element. See http://dev.w3.org/html5/spec/Overview.html#the-keygen-element Patch #157 by Victor Costan. * Mechanize no longer follows meta refreshes that have no "url=" in the content attribute to avoid infinite loops. To follow a meta refresh to the same page set Mechanize#follow_meta_refresh_self to true. Issue #134 by Jo Hund. * Updated 'Mac Safari' User-Agent alias to Safari 5.1.1. 'Mac Safari 4' can be used for the old 'Mac Safari' alias. * When given multiple HTTP authentication options mechanize now picks the strongest method. * Improvements to HTTP authorization: * mechanize raises Mechanize::UnathorizedError for 401 responses which is a sublcass of Mechanize::ResponseCodeError. * Added support for NTLM authentication, but this has not been tested. * Mechanize::Cookie.new accepts attributes in a hash. * Mechanize::CookieJar#<<(cookie) (alias: add!) is added. Issue #139 * Different mechanize instances may now have different loggers. Issue #122 * Mechanize now accepts a proxy port as a service name or number string. Issue #167 * Bug fixes * Mechanize now handles cookies just as most modern browsers do, roughly based on RFC 6265. * domain=.example.com (which is invalid) is considered identical to domain=example.com. * A cookie with domain=example.com is sent to host.sub.example.com as well as host.example.com and example.com. * A cookie with domain=TLD (no dots) is accepted and sent if the host name is TLD, and rejected otherwise. To retain compatibility and convention, host/domain names starting with "local" are exempt from this rule. * A cookie with no domain attribute is only sent to the original host. * A cookie with an Effective TLD is rejected based on the public suffix list. (cf. http://publicsuffix.org/) * "Secure" cookies are not sent via non-https connection. * Subdomain match is not performed against an IP address. * It is recommended that you clear out existing cookie jars for regeneration because previously saved cookies may not have been parsed correctly. * Mechanize takes more care to avoid saving files with certain unsafe names. You should still take care not to use mechanize to save files directly into your home directory ($HOME). Issue #163. * Mechanize#cookie_jar= works again. Issue #126 * The original Referer value persists on redirection. Issue #150 * Do not send a referer on a Refresh header based redirection. * Fixed encoding error in tests when LANG=C. Patch #142 by jinschoi. * The order of items in a form submission now match the DOM order. Patch #129 by kitamomonga * Fixed proxy example in EXAMPLE. Issue #146 by NielsKSchjoedt === 2.0.1 / 2011-06-28 Mechanize now uses minitest to avoid 1.9 vs 1.8 assertion availability in test/unit * Bug Fixes * Restored Mechanize#set_proxy. Issue #117, #118, #119 * Mechanize::CookieJar#load now lazy-loads YAML. Issue #118 * Mechanize#keep_alive_time no longer crashes but does nothing as net-http-persistent does not support HTTP/1.0 keep-alive extensions. === 2.0 / 2011-06-27 Mechanize is now under the MIT license * API changes * WWW::Mechanize has been removed. Use Mechanize. * Pre connect hooks are now called with the agent and the request. See Mechanize#pre_connect_hooks. * Post connect hooks are now called with the agent and the response. See Mechanize#post_connect_hooks. * Mechanize::Chain is gone, as an internal API this should cause no problems. * Mechanize#fetch_page no longer accepts an options Hash. * Mechanize#put now accepts headers instead of an options Hash as the last argument * Mechanize#delete now accepts headers instead of an options Hash as the last argument * Mechanize#request_with_entity now accepts headers instead of an options Hash as the last argument * Mechanize no longer raises RuntimeError directly, Mechanize::Error or ArgumentError are raised instead. * The User-Agent header has changed. It no longer includes the WWW- prefix and now includes the ruby version. The URL has been updated as well. * Mechanize now requires ruby 1.8.7 or newer. * Hpricot support has been removed as webrobots requires nokogiri. * Mechanize#get no longer accepts the referer as the second argument. * Mechanize#get no longer allows the HTTP method to be changed (:verb option). * Mechanize::Page::Meta is now Mechanize::Page::MetaRefresh to accurately depict its responsibilities. * Mechanize::Page#meta is now Mechanize::Page#meta_refresh as it only contains meta elements with http-equiv of "refresh" * Mechanize::Page#charset is now Mechanize::Page::charset. GH #112, patch by Godfrey Chan. * Deprecations * Mechanize#get with an options hash is deprecated and will be removed after October, 2011. * Mechanize::Util::to_native_charset is deprecated as it is no longer used by Mechanize. * New Features * Add header reference methods to Mechanize::File so that a reponse object gets compatible with Net::HTTPResponse. * Mechanize#click accepts a regexp or string to click a button/link in the current page. It works as expected when not passed a string or regexp. * Provide a way to only follow permanent redirects (301) automatically: agent.redirect_ok = :permanent GH #73 * Mechanize now supports HTML5 meta charset. GH #113 * Documented various Mechanize accessors. GH #66 * Mechanize now uses net-http-digest_auth. GH #31 * Mechanize now implements session cookies. GH #78 * Mechanize now implements deflate decoding. GH #40 * Mechanize now allows a certificate and key to be passed directly. GH #71 * Mechanize::Form::MultiSelectList now implements #option_with and #options_with. GH #42 * Add Mechanize::Page::Link#rel and #rel?(kind) to read and test the rel attribute. * Add Mechanize::Page#canonical_uri to read a tag. * Add support for Robots Exclusion Protocol (i.e. robots.txt) and nofollow/noindex in meta tags and the rel attribute. Automatic exclusion can be turned on by setting: agent.robots = true * Manual robots.txt test can be performed with Mechanize#robots_allowed? and #robots_disallowed?. * Mechanize::Form now supports the accept-charset attribute. GH #96 * Mechanize::ResponseReadError is raised if there is an exception while reading the response body. This allows recovery from broken HTTP servers (or connections). GH #90 * Mechanize#follow_meta_refresh set to :anywhere will follow meta refresh found outside of a document's head. GH #99 * Add support for HTML5's rel="noreferrer" attribute which indicates no "Referer" information should be sent when following the link. * A frame will now load its content when #content is called. GH #111 * Added Mechanize#default_encoding to provide a default for pages with no encoding specified. GH #104 * Added Mechanize#force_default_encoding which only uses Mechanize#default_encoding for parsing HTML. GH #104 * Bug Fixes: * Fixed a bug where Referer is not sent when accessing a relative URI starting with "http". * Fix handling of Meta Refresh with relative paths. GH #39 * Mechanize::CookieJar now supports RFC 2109 correctly. GH #85 * Fixed typo in EXAMPLES.rdoc. GH #74 * The base element is now handled correctly for images. GH #72 * Image buttons with no name attribute are now included in the form's button list. GH#56 * Improved handling of non ASCII-7bit compatible characters in links (only an issue on ruby 1.8). GH #36, GH #75 * Loading cookies.txt is faster. GH #38 * Mechanize no longer sends cookies for a.b.example to axb.example. GH #41 * Mechanize no longer sends the button name as a form field for image buttons. GH #45 * Blank cookie values are now skipped. GH #80 * Mechanize now adds a '.' to cookie domains if no '.' was sent. This is not allowed by RFC 2109 but does appear in RFC 2965. GH #86 * file URIs are now read in binary mode. GH #83 * Content-Encoding: x-gzip is now treated like gzip per RFC 2616. * Mechanize now unescapes URIs for meta refresh. GH #68 * Mechanize now has more robust HTML charset detection. GH #43 * Mechanize::Form::Textarea is now created from a textarea element. GH #94 * A meta content-type now overrides the HTTP content type. GH #114 * Mechanize::Page::Link#uri now handles both escaped and unescaped hrefs. GH #107 === 1.0.0 * New Features: * An optional verb may be passed to Mechanize#get GH #26 * The WWW constant is deprecated. Switch to the top level constant Mechanize * SelectList#option_with and options_with for finding options * Bug Fixes: * Rescue errors from bogus encodings * 7bit content-encoding support. Thanks sporkmonger! GH #2 * Fixed a bug with iconv conversion. Thanks awesomeman! GH #9 * meta redirects outside the head are not followed. GH #13 * Form submissions work with nil page encodings. GH #25 * Fixing default values with serialized cookies. GH #3 * Checkboxes and fields are sorted by page appearance before submitting. #11 === 0.9.3 * Bug Fixes: * Do not apply encoding if encoding equals 'none' Thanks Akinori MUSHA! * Fixed Page#encoding= when changing the value from or to nil. Made it return the assigned value while at it. (Akinori MUSHA) * Custom request headers may be supplied WWW::Mechanize#request_headers RF #24516 * HTML Parser may be set on a per instance level WWW::Mechanize#html_parser RF #24693 * Fixed string encoding in ruby 1.9. RF #2433 * Rescuing Zlib::DataErrors (Thanks Kelley Reynolds) * Fixing a problem with frozen SSL objects. RF #24950 * Do not send a referer on meta refresh. RF #24945 * Fixed a bug with double semi-colons in Content-Disposition headers * Properly handling cookies that specify a path. RF #25259 === 0.9.2 / 2009/03/05 * New Features: * Mechanize#submit and Form#submit take arbitrary headers(thanks penguincoder) * Bug Fixes: * Fixed a bug with bad cookie parsing * Form::RadioButton#click unchecks other buttons (RF #24159) * Fixed problems with Iconv (RF #24190, RF #24192, RF #24043) * POST parameters should be CGI escaped * Made Content-Type match case insensitive (Thanks Kelly Reynolds) * Non-string form parameters work === 0.9.1 2009/02/23 * New Features: * Encoding may be specified for a page: Page#encoding= * Bug Fixes: * m17n fixes. ありがとう konn! * Fixed a problem with base tags. ありがとう Keisuke * HEAD requests do not record in the history * Default encoding to ISO-8859-1 instead of ASCII * Requests with URI instances should not be polluted RF #23472 * Nonce count fixed for digest auth requests. Thanks Adrian Slapa! * Fixed a referer issue with requests using a uri. RF #23472 * WAP content types will now be parsed * Rescued poorly formatted cookies. Thanks Kelley Reynolds! === 0.9.0 * Deprecations * WWW::Mechanize::List is gone! * Mechanize uses Nokogiri as it's HTML parser but you may switch to Hpricot by using WWW::Mechanize.html_parser = Hpricot * Bug Fixes: * Nil check on page when base tag is used #23021 === 0.8.5 * Deprecations * WWW::Mechanize::List will be deprecated in 0.9.0, and warnings have been added to help you upgrade. * Bug Fixes: * Stopped raising EOF exceptions on HEAD requests. ありがとう:HIRAKU Kuroda * Fixed exceptions when a logger is set and file:// requests are made. * Made Mechanize 1.9 compatible * Not setting the port in the host header for SSL sites. * Following refresh headers. Thanks Tim Connor! * Cookie Jar handles cookie domains containing ports, like 'mydomain.com:443' (Thanks Michal Ochman!) * Fixing strange uri escaping problems [#22604] * Making content-type determintation more robust. (thanks Han Holl!) * Dealing with links that are query string only. [#22402] * Nokogiri may be dropped in as a replacement. WWW::Mechanize.html_parser = Nokogiri::HTML * Making sure the correct page is added to the history on meta refresh. [#22708] * Mechanize#get requests no longer send a referer unless they are relative requests. === 0.8.4 * Bug Fixes: * Setting the port number on the host header. * Fixing Authorization headers for picky servers === 0.8.3 * Bug Fixes: * Making sure logger is set during SSL connections. === 0.8.2 * Bug Fixes: * Doh! I was accidentally setting headers twice. === 0.8.1 * Bug Fixes: * Fixed problem with nil pointer when logger is set === 0.8.0 * New Features: * Lifecycle hooks. Mechanize#pre_connect_hooks, Mechanize#post_connect_hooks * file:/// urls are now supported * Added Mechanize::Page#link_with, frame_with for searching for links using +criteria+. * Implementing PUT, DELETE, and HEAD requests * Bug Fixes: * Fixed an infinite loop when content-length and body length don't match. * Only setting headers once * Adding IIS authentication support === 0.7.8 * Bug Fixes: * Fixed bug when receiving a 304 response (HTTPNotModified) on a page not cached in history. * #21428 Default to HTML parser for 'application/xhtml+xml' content-type. * Fixed an issue where redirects were resending posted data === 0.7.7 * New Features: * Page#form_with takes a +criteria+ hash. * Page#form is changed to Page#form_with * Mechanize#get takes custom http headers. Thanks Mike Dalessio! * Form#click_button submits a form defaulting to the current button. * Form#set_fields now takes a hash. Thanks Tobi! * Mechanize#redirection_limit= for setting the max number of redirects. * Bug Fixes: * Added more examples. Thanks Robert Jackson. * #20480 Making sure the Host header is set. * #20672 Making sure cookies with weird semicolons work. * Fixed bug with percent signs in urls. http://d.hatena.ne.jp/kitamomonga/20080410/ruby_mechanize_percent_url_bug * #21132 Not checking for EOF errors on redirect * Fixed a weird gzipping error. * #21233 Smarter multipart boundry. Thanks Todd Willey! * #20097 Supporting meta tag cookies. === 0.7.6 * New Features: * Added support for reading Mozilla cookie jars. Thanks Chris Riddoch! * Moving text, password, hidden, int to default. Thanks Tim Harper! * Mechanize#history_added callback for page loads. Thanks Tobi Reif! * Mechanize#scheme_handlers callbacks for handling unsupported schemes on links. * Bug Fixes: * Ignoring scheme case http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=470642 * Not encoding tildes in uris. Thanks Bruno. [#19380] * Resetting request bodys when retrying form posts. Thanks Bruno. [#19379] * Throwing away keep alive connections on EPIPE and ECONNRESET. * Duplicating request headers when retrying a 401. Thanks Hiroshi Ichikawa. * Simulating an EOF error when a response length is bad. Thanks Tobias Gruetzmacher. http://rubyforge.org/tracker/index.php?func=detail&aid=19178&group_id=1453&atid=5711 * Defaulting option tags to the inner text. http://rubyforge.org/tracker/index.php?func=detail&aid=19976&group_id=1453&atid=5709 * Supporting blank strings for option values. http://rubyforge.org/tracker/index.php?func=detail&aid=19975&group_id=1453&atid=5709 === 0.7.5 * Fixed a bug when fetching files and not pages. Thanks Mat Schaffer! === 0.7.4 * doh! === 0.7.3 * Pages are now yielded to a blocks given to WWW::Mechanize#get * WWW::Mechanize#get now takes hash arguments for uri parameters. * WWW::Mechanize#post takes an IO object as a parameter and posts correctly. * Fixing a strange zlib inflate problem on windows === 0.7.2 * Handling gzipped responses with no Content-Length header === 0.7.1 * Added iPhone to the user agent aliases. [#17572] * Fixed a bug with EOF errors in net/http. [#17570] * Handling 0 length gzipped responses. [#17471] === 0.7.0 * Removed Ruby 1.8.2 support * Changed parser to lazily parse links * Lazily parsing document * Adding verify_callback for SSL requests. Thanks Mike Dalessio! * Fixed a bug with Accept-Language header. Thanks Bill Siggelkow. === 0.6.11 * Detecting single quotes in meta redirects. * Adding pretty inspect for ruby versions > 1.8.4 (Thanks Joel Kociolek) http://rubyforge.org/tracker/index.php?func=detail&aid=13150&group_id=1453&atid=5709 * Fixed bug with file name in multipart posts http://rubyforge.org/tracker/?func=detail&aid=15594&group_id=1453&atid=5709 * Posting forms relative to the originating page. Thanks Mortee. * Added a FAQ http://rubyforge.org/tracker/?func=detail&aid=15772&group_id=1453&atid=5709 === 0.6.10 * Made digest authentication work with POSTs. * Made sure page was HTML before following meta refreshes. http://rubyforge.org/tracker/index.php?func=detail&aid=12260&group_id=1453&atid=5709 * Made sure that URLS with a host and no path would default to '/' for history purposes. http://rubyforge.org/tracker/index.php?func=detail&aid=12368&group_id=1453&atid=5709 * Avoiding memory leaks with transact. Thanks Tobias Gruetzmacher! http://rubyforge.org/tracker/index.php?func=detail&aid=12057&group_id=1453&atid=5711 * Fixing a problem with # signs in the file name. Thanks Tobias Gruetzmacher! http://rubyforge.org/tracker/index.php?func=detail&aid=12510&group_id=1453&atid=5711 * Made sure that blank form values are submitted. http://rubyforge.org/tracker/index.php?func=detail&aid=12505&group_id=1453&atid=5709 * Mechanize now respects the base tag. Thanks Stephan Dale. http://rubyforge.org/tracker/index.php?func=detail&aid=12468&group_id=1453&atid=5709 * Aliasing inspect to pretty_inspect. Thanks Eric Promislow. http://rubyforge.org/pipermail/mechanize-users/2007-July/000157.html === 0.6.9 * Updating UTF-8 support for urls * Adding AREA tags to the links list. http://rubyforge.org/pipermail/mechanize-users/2007-May/000140.html * WWW::Mechanize#follow_meta_refresh will allow you to automatically follow meta refresh tags. [#10032] * Adding x-gzip to accepted content-encoding. Thanks Simon Strandgaard http://rubyforge.org/tracker/index.php?func=detail&aid=11167&group_id=1453&atid=5711 * Added Digest Authentication support. Thanks to Ryan Davis and Eric Hodel, you get a gold star! === 0.6.8 * Keep alive can be shut off now with WWW::Mechanize#keep_alive * Conditional requests can be shut off with WWW::Mechanize#conditional_requests * Monkey patched Net::HTTP#keep_alive? * [#9877] Moved last request time. Thanks Max Stepanov * Added WWW::Mechanize::File#save * Defaulting file name to URI or Content-Disposition * Updating compatability with hpricot * Added more unit tests === 0.6.7 * Fixed a bug with keep-alive requests * [#9549] fixed problem with cookie paths === 0.6.6 * Removing hpricot overrides * Fixed a bug where alt text can be nil. Thanks Yannick! * Unparseable expiration dates in cookies are now treated as session cookies * Caching connections * Requests now default to keep alive * [#9434] Fixed bug where html entities weren't decoded * [#9150] Updated mechanize history to deal with redirects === 0.6.5 * Copying headers to a hash to prevent memory leaks * Speeding up page parsing * Aliased fields to elements * Adding If-Modified-Since header * Added delete_field! to form. Thanks to Sava Chankov * Updated uri escaping to support high order characters. Thanks to Henrik Nyh. * Better handling relative URIs. Thanks to Henrik Nyh * Now handles pipes in URLs http://rubyforge.org/tracker/?func=detail&aid=7140&group_id=1453&atid=5709 * Now escaping html entities in form fields. http://rubyforge.org/tracker/?func=detail&aid=7563&group_id=1453&atid=5709 * Added MSIE 7.0 user agent string === 0.6.4 * Adding the "redirect_ok" method to Mechanize to stop mechanize from following redirects. http://rubyforge.org/tracker/index.php?func=detail&aid=6571&group_id=1453&atid=5712 * Added protected method Mechanize#set_headers so that subclasses can set custom headers. http://rubyforge.org/tracker/?func=detail&aid=7208&group_id=1453&atid=5712 * Aliased Page#referer to Page#page * Fixed a bug when clicking relative urls http://rubyforge.org/pipermail/mechanize-users/2006-November/000035.html * Fixing a bug when bad version or max age is passed to Cookie::parse http://rubyforge.org/pipermail/mechanize-users/2006-November/000033.html * Fixing a bug with response codes. [#6526] * Fixed bug [#6548]. Input type of 'button' was not being added as a button. * Fixed bug [#7139]. REXML parser calls hpricot parser by accident === 0.6.3 * Added keys and values methods to Form * Added has_value? to Form * Added a has_field? method to Form * The add_field! method on Form now creates a field for you on the form. Thanks to Mat Schaffer for the patch. http://rubyforge.org/pipermail/mechanize-users/2006-November/000025.html * Fixed a bug when form actions have html ecoded entities in them. http://rubyforge.org/pipermail/mechanize-users/2006-October/000019.html * Fixed a bug when links or frame sources have html encoded entities in the href or src. * Fixed a bug where '#' symbols are encoded http://rubyforge.org/forum/message.php?msg_id=14747 === 0.6.2 * Added a yield to Page#form so that dealing with forms can be more DSL like. * Added the parsed page to the ResponseCodeError so that the parsed results can be accessed even in the event of an error. http://rubyforge.org/pipermail/mechanize-users/2006-September/000007.html * Updated documentation (Thanks to Paul Smith) === 0.6.1 * Added a method to Form called "submit". Now forms can be submitted by calling a method on the form. * Added a click method to links * Added an REXML pluggable parser for backwards compatability. To use it, just do this: agent.pluggable_parser.html = WWW::Mechanize::REXMLPage * Fixed a bug with referrers by adding a page attribute to forms and links. * Fixed a bug where domain names were case sensitive. http://tenderlovemaking.com/2006/09/04/road-to-ruby-mechanize-060/#comment-53 * Fixed a bug with URI escaped links. http://rubyforge.org/pipermail/mechanize-users/2006-September/000002.html * Fixed a bug when options in select lists don't have a value. Thanks Dan Higham [#5837] Code in lib/mechanize/form_elements.rb is incorrect. * Fixed a bug with loading text in to links. http://rubyforge.org/pipermail/mechanize-users/2006-September/000000.html === 0.6.0 * Changed main parser to use hpricot * Made WWW::Mechanize::Page class searchable like hpricot * Updated WWW::Mechanize#click to support hpricot links like this: @agent.click (page/"a").first * Clicking a Frame is now possible: @agent.click (page/"frame").first * Removed deprecated attr_finder * Removed REXML helper methods since the main parser is now hpricot * Overhauled cookie parser to use WEBrick::Cookie === 0.5.4 * Added WWW::Mechanize#trasact for saving history state between in a transaction. See the EXAMPLES file. Thanks Johan Kiviniemi. * Added support for gzip compressed pages * Forms can now be accessed like a hash. For example, to set the value of an input field named 'name' to "Aaron", you can do this: form['name'] = "Aaron" Or to get the value of a field named 'name', do this: puts form['name'] * File uploads will now read the file specified in FileUpload#file_name * FileUpload can use an IO object in FileUpload#file_data * Fixed a bug with saving files on windows * Fixed a bug with the filename being set in forms === 0.5.3 * Mechanize#click will now act on the first element of an array. So if an array of links is passed to WWW::Mechanize#click, the first link is clicked. That means the syntax for clicking links is shortened and still supports selecting a link. The following are equivalent: agent.click page.links.first agent.click page.links * Fixed a bug with spaces in href's and get's * Added a tick, untick, and click method to radio buttons so that radiobuttons can be "clicked" * Added a tick, untick, and click method to check boxes so that checkboxes can be "clicked" * Options on Select lists can now be "tick"ed, and "untick"ed. * Fixed a potential bug conflicting with rails. Thanks Eric Kolve * Updated log4r support for a speed increase. Thanks Yinon Bentor * Added inspect methods and pretty printing === 0.5.2 * Fixed a bug with input names that are nil * Added a warning when using attr_finder because attr_finder will be deprecated in 0.6.0 in favor of method calls. So this syntax: @agent.links(:text => 'foo') should be changed to this: @agent.links.text('foo') * Added support for selecting multiple options in select tags that support multiple options. See WWW::Mechanize::MultiSelectList. * New select list methods have been added, select_all, select_none. * Options for select lists can now be "clicked" which toggles their selection, they can be "selected" and "unselected". See WWW::Mechanize::Option * Added a method to set multiple fields at the same time, WWW::Mechanize::Form#set_fields. Which can be used like so: form.set_fields( :foo => 'bar', :name => 'Aaron' ) === 0.5.1 * Fixed bug with file uploads * Added performance tweaks to the cookie class === 0.5.0 * Added pluggable parsers. (Thanks to Eric Kolve for the idea) * Changed namespace so all classes are under WWW::Mechanize. * Updating Forms so that fields can be used as accessors (Thanks Gregory Brown) * Added WWW::Mechanize::File as default object used for unknown content types. * Added 'save_as' method to Mechanize::File, so any page can be saved. * Adding 'save_as' and 'load' to CookieJar so that cookies can be saved between sessions. * Added WWW::Mechanize::FileSaver pluggable parser to automatically save files. * Added WWW::Mechanize::Page#title for page titles * Added OpenSSL certificate support (Thanks Mike Dalessio) * Removed support for body filters in favor of pluggable parsers. * Fixed cookie bug adding a '/' when the url is missing one (Thanks Nick Dainty) === 0.4.7 * Fixed bug with no action in forms. Thanks to Adam Wiggins * Setting a default user-agent string * Added house cleaning to the cookie jar so expired cookies don't stick around. * Added new method WWW::Form#field to find the first field with a given name. (thanks to Gregory Brown) * Added WWW::Mechanize#get_file for fetching non text/html files === 0.4.6 * Added support for proxies * Added a uri field to WWW::Link * Added a error class WWW::Mechanize::ContentTypeError * Added image alt text to link text * Added an visited? method to WWW::Mechanize * Added Array#value= which will set the first value to the argument. That allows syntax as such: form.fields.name('q').value = 'xyz' Before it was like this: form.fields.name('q').first.value = 'xyz' === 0.4.5 * Added support for multiple values of the same name * Updated build_query_string to take an array of arrays (Thanks Michal Janeczek) * Added WWW::Mechanize#body_filter= so that response bodies can be preprocessed * Added WWW::Page#body_filter= so that response bodies can be preprocessed * Added support for more date formats in the cookie parser * Fixed a bug with empty select lists * Fixing a problem with cookies not handling no spaces after semicolons === 0.4.4 * Fixed error in method signature, basic_authetication is now basic_auth * Fixed bug with encoding names in file uploads (Big thanks to Alex Young) * Added options to the select list === 0.4.3 * Added syntactic sugar for finding things * Fixed bug with HttpOnly option in cookies * Fixed a bug with cookie date parsing * Defaulted dropdown lists to the first element * Added unit tests === 0.4.2 * Added support for iframes * Made mechanize dependant on ruby-web rather than narf * Added unit tests * Fixed a bunch of warnings === 0.4.1 * Added support for file uploading * Added support for frames (Thanks Gabriel[mailto:leerbag@googlemail.com]) * Added more unit tests * Fixed some bugs === 0.4.0 * Added more unit tests * Added a cookie jar with better cookie support, included expiration of cookies and general cookie security. * Updated mechanize to use built in net/http if ruby version is new enough. * Added support for meta refresh tags * Defaulted form actions to 'GET' * Fixed various bugs * Added more unit tests * Added a response code exception * Thanks to Brian Ellin (brianellin@gmail.com) for: Added support for CA files, and support for 301 response codes mechanize-2.7.5/README.rdoc0000644000004100000410000000454512772546476015362 0ustar www-datawww-data= Mechanize {}[http://travis-ci.org/sparklemotion/mechanize] * http://docs.seattlerb.org/mechanize * https://github.com/sparklemotion/mechanize == Description The Mechanize library is used for automating interaction with websites. Mechanize automatically stores and sends cookies, follows redirects, and can follow links and submit forms. Form fields can be populated and submitted. Mechanize also keeps track of the sites that you have visited as a history. == Dependencies * ruby 1.9.2 or newer * nokogiri[https://github.com/sparklemotion/nokogiri] == Support: The bug tracker is available here: * https://github.com/sparklemotion/mechanize/issues == Examples If you are just starting, check out the GUIDE[http://docs.seattlerb.org/mechanize/GUIDE_rdoc.html] or the EXAMPLES[http://docs.seattlerb.org/mechanize/EXAMPLES_rdoc.html] file. == Developers Use bundler to install dependencies: bundle install Run all tests with: rake test You can also use +autotest+ from the ZenTest gem to run tests. See also Mechanize::TestCase to read about the built-in testing infrastructure. == Authors Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de) Copyright (c) 2006-2011: * {Aaron Patterson}[http://tenderlovemaking.com] (aaronp@rubyforge.org) * {Mike Dalessio}[http://mike.daless.io] (mike@csa.net) Copyright (c) 2011-2015: * {Eric Hodel}[http://blog.segment7.net] (drbrain@segment7.net) * {Akinori MUSHA}[http://blog.akinori.org] (knu@idaemons.org) * {Lee Jarvis}[http://twitter.com/lee_jarvis] (ljjarvis@gmail.com) This library comes with a shameless plug for employing me (Aaron[http://tenderlovemaking.com/]) programming Ruby, my favorite language! == Acknowledgments This library was heavily influenced by its namesake in the Perl world. A big thanks goes to {Andy Lester}[http://petdance.com], the author of the original Perl module WWW::Mechanize which is available here[http://search.cpan.org/dist/WWW-Mechanize/]. Ruby Mechanize would not be around without you! Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone who's helped out in various ways. Finally, thank you to the people using this library! == License This library is distributed under the MIT license. Please see the LICENSE[http://docs.seattlerb.org/mechanize/LICENSE_rdoc.html] file. mechanize-2.7.5/.travis.yml0000644000004100000410000000111112772546476015647 0ustar www-datawww-data--- language: ruby notifications: email: - drbrain@segment7.net - ljjarvis@gmail.com - knu@idaemons.org sudo: false # bundler is missing for jruby-head in travis-ci # https://github.com/travis-ci/travis-ci/issues/5861 before_install: gem query -i -n ^bundler$ >/dev/null || gem install bundler rvm: - 1.9.3 - 2.0.0 - 2.1 - 2.2 - 2.3.1 - ruby-head - jruby-1.7.25 - jruby-9.1.2.0 - jruby-head - rbx-19mode script: rake test matrix: allow_failures: - rvm: 2.3 - rvm: ruby-head - rvm: jruby-1.7.25 - rvm: jruby-9.1.2.0 - rvm: jruby-head - rvm: rbx-19mode mechanize-2.7.5/lib/0000755000004100000410000000000012772546476014312 5ustar www-datawww-datamechanize-2.7.5/lib/mechanize/0000755000004100000410000000000012772546476016255 5ustar www-datawww-datamechanize-2.7.5/lib/mechanize/headers.rb0000644000004100000410000000061212772546476020214 0ustar www-datawww-dataclass Mechanize::Headers < Hash def [](key) super(key.downcase) end def []=(key, value) super(key.downcase, value) end def key?(key) super(key.downcase) end def canonical_each block_given? or return enum_for(__method__) each { |key, value| key = key.capitalize key.gsub!(/-([a-z])/) { "-#{$1.upcase}" } yield [key, value] } end end mechanize-2.7.5/lib/mechanize/page/0000755000004100000410000000000012772546476017171 5ustar www-datawww-datamechanize-2.7.5/lib/mechanize/page/meta_refresh.rb0000644000004100000410000000401612772546476022163 0ustar www-datawww-data## # This class encapsulates a meta element with a refresh http-equiv. Mechanize # treats meta refresh elements just like 'a' tags. MetaRefresh objects will # contain links, but most likely will have no text. class Mechanize::Page::MetaRefresh < Mechanize::Page::Link ## # Time to wait before next refresh attr_reader :delay ## # This MetaRefresh links did not contain a url= in the content attribute and # links to itself. attr_reader :link_self ## # Matches the content attribute of a meta refresh element. After the match: # # $1:: delay # $3:: url CONTENT_REGEXP = /^\s*(\d+\.?\d*)\s*(?:;(?:\s*url\s*=\s*(['"]?)(\S*)\2)?\s*)?$/i ## # Regexp of unsafe URI characters that excludes % for Issue #177 UNSAFE = /[^\-_.!~*'()a-zA-Z\d;\/?:@&%=+$,\[\]]/ ## # Parses the delay and url from the content attribute of a meta # refresh element. # # Returns an array of [delay, url, link_self], where the first two # are strings containing the respective parts of the refresh value, # and link_self is a boolean value that indicates whether the url # part is missing or empty. If base_uri, the URI of the current # page is given, the value of url becomes an absolute URI. def self.parse content, base_uri = nil m = CONTENT_REGEXP.match(content) or return delay, url = m[1], m[3] url &&= url.empty? ? nil : Mechanize::Util.uri_escape(url, UNSAFE) link_self = url.nil? if base_uri url = url ? base_uri + url : base_uri end return delay, url, link_self end def self.from_node node, page, uri = nil http_equiv = node['http-equiv'] and /\ARefresh\z/i =~ http_equiv or return delay, uri, link_self = parse node['content'], uri return unless delay new node, page, delay, uri, link_self end def initialize node, page, delay, href, link_self = false super node, page.mech, page @delay = delay.include?(?.) ? delay.to_f : delay.to_i @href = href @link_self = link_self end def noreferrer? true end end mechanize-2.7.5/lib/mechanize/page/link.rb0000644000004100000410000000463512772546476020463 0ustar www-datawww-data## # This class encapsulates links. It contains the text and the URI for # 'a' tags parsed out of an HTML page. If the link contains an image, # the alt text will be used for that image. # # For example, the text for the following links with both be 'Hello World': # # Hello World # Hello World class Mechanize::Page::Link attr_reader :node attr_reader :href attr_reader :attributes attr_reader :page alias :referer :page def initialize(node, mech, page) @node = node @attributes = node @href = node['href'] @mech = mech @page = page @text = nil @uri = nil end # Click on this link def click @mech.click self end # This method is a shorthand to get link's DOM id. # Common usage: # page.link_with(:dom_id => "links_exact_id") def dom_id node['id'] end # This method is a shorthand to get a link's DOM class # Common usage: # page.link_with(:dom_class => "links_exact_class") def dom_class node['class'] end def pretty_print(q) # :nodoc: q.object_group(self) { q.breakable; q.pp text q.breakable; q.pp href } end alias inspect pretty_inspect # :nodoc: # A list of words in the rel attribute, all lower-cased. def rel @rel ||= (val = attributes['rel']) ? val.downcase.split(' ') : [] end # Test if the rel attribute includes +kind+. def rel? kind rel.include? kind end # Test if this link should not be traced. def noreferrer? rel?('noreferrer') end # The text content of this link def text return @text if @text @text = @node.inner_text # If there is no text, try to find an image and use it's alt text if (@text.nil? or @text.empty?) and imgs = @node.search('img') then @text = imgs.map do |e| e['alt'] end.join end @text end alias :to_s :text # A URI for the #href for this link. The link is first parsed as a raw # link. If that fails parsing an escaped link is attepmted. def uri @uri ||= if @href then begin URI.parse @href rescue URI::InvalidURIError URI.parse WEBrick::HTTPUtils.escape @href end end end # A fully resolved URI for the #href for this link. def resolved_uri @mech.resolve uri end end mechanize-2.7.5/lib/mechanize/page/label.rb0000644000004100000410000000050012772546476020570 0ustar www-datawww-data## # A form label on an HTML page class Mechanize::Page::Label attr_reader :node attr_reader :text attr_reader :page alias :to_s :text def initialize(node, page) @node = node @text = node.inner_text @page = page end def for (id = @node['for']) && page.search("##{id}") || nil end end mechanize-2.7.5/lib/mechanize/page/frame.rb0000644000004100000410000000123112772546476020605 0ustar www-datawww-data# A Frame object wraps a frame HTML element. Frame objects can be treated # just like Link objects. They contain #src, the #link they refer to and a # #name, the name of the frame they refer to. #src and #name are aliased to # #href and #text respectively so that a Frame object can be treated just like # a Link. class Mechanize::Page::Frame < Mechanize::Page::Link alias :src :href attr_reader :text alias :name :text def initialize(node, mech, referer) super(node, mech, referer) @node = node @text = node['name'] @href = node['src'] @content = nil end def content @content ||= @mech.get @href, [], page end end mechanize-2.7.5/lib/mechanize/page/base.rb0000644000004100000410000000032612772546476020431 0ustar www-datawww-data## # A base element on an HTML page. Mechanize treats base tags just like 'a' # tags. Base objects will contain links, but most likely will have no text. class Mechanize::Page::Base < Mechanize::Page::Link end mechanize-2.7.5/lib/mechanize/page/image.rb0000644000004100000410000000640512772546476020605 0ustar www-datawww-data## # An image element on an HTML page class Mechanize::Page::Image attr_reader :node attr_accessor :page attr_accessor :mech ## # Creates a new Mechanize::Page::Image from an image +node+ and source # +page+. def initialize node, page @node = node @page = page @mech = page.mech end ## # The alt attribute of the image def alt node['alt'] end ## # The caption of the image. In order of preference, the #title, #alt, or # empty string "". def caption title || alt || '' end alias :text :caption ## # The class attribute of the image def dom_class node['class'] end ## # The id attribute of the image def dom_id node['id'] end ## # The suffix of the #url. The dot is a part of suffix, not a delimiter. # # p image.url # => "http://example/test.jpg" # p image.extname # => ".jpg" # # Returns an empty string if #url has no suffix: # # p image.url # => "http://example/sampleimage" # p image.extname # => "" def extname return nil unless src File.extname url.path end ## # Downloads the image. # # agent.page.image_with(:src => /logo/).fetch.save # # The referer is: # # #page("parent") :: # all images on http html, relative #src images on https html # (no referer) :: # absolute #src images on https html # user specified :: # img.fetch(nil, my_referer_uri_or_page) def fetch parameters = [], referer = nil, headers = {} mech.get src, parameters, referer || image_referer, headers end ## # The height attribute of the image def height node['height'] end def image_referer # :nodoc: http_page = page.uri && page.uri.scheme == 'http' https_page = page.uri && page.uri.scheme == 'https' case when http_page then page when https_page && relative? then page else Mechanize::File.new(nil, { 'content-type' => 'text/plain' }, '', 200) end end ## # MIME type guessed from the image url suffix # # p image.extname # => ".jpg" # p image.mime_type # => "image/jpeg" # page.images_with(:mime_type => /gif|jpeg|png/).each do ... # # Returns nil if url has no (well-known) suffix: # # p image.url # => "http://example/sampleimage" # p image.mime_type # => nil def mime_type suffix_without_dot = extname ? extname.sub(/\A\./){''}.downcase : nil Mechanize::Util::DefaultMimeTypes[suffix_without_dot] end def pretty_print(q) # :nodoc: q.object_group(self) { q.breakable; q.pp url q.breakable; q.pp caption } end alias inspect pretty_inspect # :nodoc: def relative? # :nodoc: %r{^https?://} !~ src end ## # The src attribute of the image def src node['src'] end ## # The title attribute of the image def title node['title'] end ## # The URL string of this image def to_s url.to_s end ## # URI for this image def url if relative? then if page.bases[0] then page.bases[0].href + src.to_s else page.uri + Mechanize::Util.uri_escape(src.to_s) end else URI Mechanize::Util.uri_escape(src) end end alias uri url ## # The width attribute of the image def width node['width'] end end mechanize-2.7.5/lib/mechanize/history.rb0000644000004100000410000000234612772546476020310 0ustar www-datawww-data## # This class manages history for your mechanize object. class Mechanize::History < Array attr_accessor :max_size def initialize(max_size = nil) @max_size = max_size @history_index = {} end def initialize_copy(orig) super @history_index = orig.instance_variable_get(:@history_index).dup end def inspect # :nodoc: uris = map(&:uri).join ', ' "[#{uris}]" end def push(page, uri = nil) super page index = uri ? uri : page.uri @history_index[index.to_s] = page shift while length > @max_size if @max_size self end alias :<< :push def visited? uri page = @history_index[uri.to_s] return page if page # HACK uri = uri.dup uri.path = '/' if uri.path.empty? @history_index[uri.to_s] end alias visited_page visited? def clear @history_index.clear super end def shift return nil if length == 0 page = self[0] self[0] = nil super remove_from_index(page) page end def pop return nil if length == 0 page = super remove_from_index(page) page end private def remove_from_index(page) @history_index.each do |k,v| @history_index.delete(k) if v == page end end end mechanize-2.7.5/lib/mechanize/directory_saver.rb0000644000004100000410000000377512772546476022022 0ustar www-datawww-data## # Unlike Mechanize::FileSaver, the directory saver places all downloaded files # in a single pre-specified directory. # # You must register the directory to save to before using the directory saver: # # agent.pluggable_parser['image'] = \ # Mechanize::DirectorySaver.save_to 'images' class Mechanize::DirectorySaver < Mechanize::Download @directory = nil @options = {} ## # Creates a DirectorySaver subclass that will save responses to the given # +directory+. If +options+ includes a +decode_filename+ value set to +true+ # then the downloaded filename will be ran through +CGI.unescape+ before # being saved. If +options+ includes a +overwrite+ value set to +true+ then # downloaded file will be overwritten if two files with the same names exist. def self.save_to directory, options = {} directory = File.expand_path directory Class.new self do |klass| klass.instance_variable_set :@directory, directory klass.instance_variable_set :@options, options end end ## # The directory downloaded files will be saved to. def self.directory @directory end ## # True if downloaded files should have their names decoded before saving. def self.decode_filename? @options[:decode_filename] end ## # Checks if +overwrite+ parameter is set to true def self.overwrite? @options[:overwrite] end ## # Saves the +body_io+ into the directory specified for this DirectorySaver # by save_to. The filename is chosen by Mechanize::Parser#extract_filename. def initialize uri = nil, response = nil, body_io = nil, code = nil directory = self.class.directory raise Mechanize::Error, 'no save directory specified - ' \ 'use Mechanize::DirectorySaver.save_to ' \ 'and register the resulting class' unless directory super @filename = CGI.unescape(@filename) if self.class.decode_filename? path = File.join directory, @filename if self.class.overwrite? save! path else save path end end end mechanize-2.7.5/lib/mechanize/test_case.rb0000644000004100000410000001641112772546476020557 0ustar www-datawww-datarequire 'mechanize' require 'logger' require 'tempfile' require 'tmpdir' require 'webrick' require 'zlib' require 'rubygems' begin gem 'minitest' rescue Gem::LoadError end require 'minitest/autorun' begin require 'minitest/pride' rescue LoadError end ## # A generic test case for testing mechanize. Using a subclass of # Mechanize::TestCase for your tests will create an isolated mechanize # instance that won't pollute your filesystem or other tests. # # Once Mechanize::TestCase is loaded no HTTP requests will be made outside # mechanize itself. All requests are handled via WEBrick servlets. # # Mechanize uses WEBrick servlets to test some functionality. You can run # other HTTP clients against the servlets using: # # ruby -rmechanize/test_case/server -e0 # # Which will launch a test server at http://localhost:8000 class Mechanize::TestCase < Minitest::Test TEST_DIR = File.expand_path '../../../test', __FILE__ REQUESTS = [] ## # Creates a clean mechanize instance +@mech+ for use in tests. def setup super REQUESTS.clear @mech = Mechanize.new @ssl_private_key = nil @ssl_certificate = nil end ## # Creates a fake page with URI http://fake.example and an empty, submittable # form. def fake_page agent = @mech uri = URI 'http://fake.example/' html = <<-END
END Mechanize::Page.new uri, nil, html, 200, agent end ## # Is the Encoding constant defined? def have_encoding? Object.const_defined? :Encoding end ## # Creates a Mechanize::Page with the given +body+ def html_page body uri = URI 'http://example/' Mechanize::Page.new uri, nil, body, 200, @mech end ## # Creates a Mechanize::CookieJar by parsing the given +str+ def cookie_jar str, uri = URI('http://example') jar = Mechanize::CookieJar.new jar.parse str, uri jar end ## # Runs the block inside a temporary directory def in_tmpdir Dir.mktmpdir do |dir| Dir.chdir dir do yield end end end ## # Creates a Nokogiri Node +element+ with the given +attributes+ def node element, attributes = {} doc = Nokogiri::HTML::Document.new node = Nokogiri::XML::Node.new element, doc attributes.each do |name, value| node[name] = value end node end ## # Creates a Mechanize::Page for the given +uri+ with the given # +content_type+, response +body+ and HTTP status +code+ def page uri, content_type = 'text/html', body = '', code = 200 uri = URI uri unless URI::Generic === uri Mechanize::Page.new(uri, { 'content-type' => content_type }, body, code, @mech) end ## # Requests made during this tests def requests REQUESTS end ## # An SSL private key. This key is the same across all test runs def ssl_private_key @ssl_private_key ||= OpenSSL::PKey::RSA.new <<-KEY -----BEGIN RSA PRIVATE KEY----- MIG7AgEAAkEA8pmEfmP0Ibir91x6pbts4JmmsVZd3xvD5p347EFvBCbhBW1nv1Gs bCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTXUQIBAQIBAQIhAPumXslvf6YasXa1hni3 p80joKOug2UUgqOLD2GUSO//AiEA9ssY6AFxjHWuwo/+/rkLmkfO2s1Lz3OeUEWq 6DiHOK8CAQECAQECIQDt8bc4vS6wh9VXApNSKIpVygtxSFe/IwLeX26n77j6Qg== -----END RSA PRIVATE KEY----- KEY end ## # An X509 certificate. This certificate is the same across all test runs def ssl_certificate @ssl_certificate ||= OpenSSL::X509::Certificate.new <<-CERT -----BEGIN CERTIFICATE----- MIIBQjCB7aADAgECAgEAMA0GCSqGSIb3DQEBBQUAMCoxDzANBgNVBAMMBm5vYm9k eTEXMBUGCgmSJomT8ixkARkWB2V4YW1wbGUwIBcNMTExMTAzMjEwODU5WhgPOTk5 OTEyMzExMjU5NTlaMCoxDzANBgNVBAMMBm5vYm9keTEXMBUGCgmSJomT8ixkARkW B2V4YW1wbGUwWjANBgkqhkiG9w0BAQEFAANJADBGAkEA8pmEfmP0Ibir91x6pbts 4JmmsVZd3xvD5p347EFvBCbhBW1nv1GsbCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTX UQIBATANBgkqhkiG9w0BAQUFAANBAAAB//////////////////////////////// //8AMCEwCQYFKw4DAhoFAAQUePiv+QrJxyjtEJNnH5pB9OTWIqA= -----END CERTIFICATE----- CERT end ## # Creates a Tempfile with +content+ that is immediately unlinked def tempfile content body_io = Tempfile.new @NAME body_io.unlink body_io.write content body_io.flush body_io.rewind body_io end end require 'mechanize/test_case/servlets' module Net # :nodoc: end class Net::HTTP # :nodoc: alias :old_do_start :do_start def do_start @started = true end PAGE_CACHE = {} alias :old_request :request def request(req, *data, &block) url = URI.parse(req.path) path = WEBrick::HTTPUtils.unescape(url.path) path = '/index.html' if path == '/' res = ::Response.new res.query_params = url.query req.query = if 'POST' != req.method && url.query then WEBrick::HTTPUtils.parse_query url.query elsif req['content-type'] =~ /www-form-urlencoded/ then WEBrick::HTTPUtils.parse_query req.body elsif req['content-type'] =~ /boundary=(.+)/ then boundary = WEBrick::HTTPUtils.dequote $1 WEBrick::HTTPUtils.parse_form_data req.body, boundary else {} end req.cookies = WEBrick::Cookie.parse(req['Cookie']) Mechanize::TestCase::REQUESTS << req if servlet_klass = MECHANIZE_TEST_CASE_SERVLETS[path] servlet = servlet_klass.new({}) servlet.send "do_#{req.method}", req, res else filename = "htdocs#{path.gsub(/[^\/\\.\w\s]/, '_')}" unless PAGE_CACHE[filename] open("#{Mechanize::TestCase::TEST_DIR}/#{filename}", 'rb') { |io| PAGE_CACHE[filename] = io.read } end res.body = PAGE_CACHE[filename] case filename when /\.txt$/ res['Content-Type'] = 'text/plain' when /\.jpg$/ res['Content-Type'] = 'image/jpeg' end end res['Content-Type'] ||= 'text/html' res.code ||= "200" response_klass = Net::HTTPResponse::CODE_TO_OBJ[res.code.to_s] response = response_klass.new res.http_version, res.code, res.message res.header.each do |k,v| v = v.first if v.length == 1 response[k] = v end res.cookies.each do |cookie| response.add_field 'Set-Cookie', cookie.to_s end response['Content-Type'] ||= 'text/html' response['Content-Length'] = res['Content-Length'] || res.body.length.to_s io = StringIO.new(res.body) response.instance_variable_set :@socket, io def io.read clen, dest = nil, _ = nil if dest then dest << super(clen) else super clen end end body_exist = req.response_body_permitted? && response_klass.body_permitted? response.instance_variable_set :@body_exist, body_exist yield response if block_given? response end end class Net::HTTPRequest # :nodoc: attr_accessor :query, :body, :cookies, :user def host 'example' end def port 80 end end class Response # :nodoc: include Net::HTTPHeader attr_reader :code attr_accessor :body, :query, :cookies attr_accessor :query_params, :http_version attr_accessor :header def code=(c) @code = c.to_s end alias :status :code alias :status= :code= def initialize @header = {} @body = '' @code = nil @query = nil @cookies = [] @http_version = '1.1' end def read_body yield body end def message '' end end mechanize-2.7.5/lib/mechanize/element_not_found_error.rb0000644000004100000410000000062412772546476023521 0ustar www-datawww-data## # Raised when an an element was not found on the Page class Mechanize::ElementNotFoundError < Mechanize::Error attr_reader :source attr_reader :element attr_reader :conditions def initialize source, element, conditions @source = source @element = element @conditions = conditions super "Element #{element} with conditions #{conditions} was not found" end end mechanize-2.7.5/lib/mechanize/robots_disallowed_error.rb0000644000004100000410000000131612772546476023533 0ustar www-datawww-data# Exception that is raised when an access to a resource is disallowed by # robots.txt or by HTML document itself. class Mechanize::RobotsDisallowedError < Mechanize::Error def initialize(url) if url.is_a?(URI) @url = url.to_s @uri = url else @url = url.to_s end end # Returns the URL (string) of the resource that caused this error. attr_reader :url # Returns the URL (URI object) of the resource that caused this # error. URI::InvalidURIError may be raised if the URL happens to # be invalid or not understood by the URI library. def uri @uri ||= URI.parse(url) end def to_s "Robots access is disallowed for URL: #{url}" end alias :inspect :to_s end mechanize-2.7.5/lib/mechanize/file_request.rb0000644000004100000410000000052612772546476021274 0ustar www-datawww-data## # A wrapper for a file URI that makes a request that works like a # Net::HTTPRequest class Mechanize::FileRequest attr_accessor :uri def initialize uri @uri = uri end def add_field *a end alias []= add_field def path @uri.path end def each_header end def response_body_permitted? true end end mechanize-2.7.5/lib/mechanize/form.rb0000644000004100000410000004307012772546476017551 0ustar www-datawww-datarequire 'mechanize/element_matcher' # This class encapsulates a form parsed out of an HTML page. Each type of # input field available in a form can be accessed through this object. # # == Examples # # Find a form and print out its fields # # form = page.forms.first # => Mechanize::Form # form.fields.each { |f| puts f.name } # # Set the input field 'name' to "Aaron" # # form['name'] = 'Aaron' # puts form['name'] class Mechanize::Form extend Mechanize::ElementMatcher attr_accessor :method, :action, :name attr_reader :fields, :buttons, :file_uploads, :radiobuttons, :checkboxes # Content-Type for form data (i.e. application/x-www-form-urlencoded) attr_accessor :enctype # Character encoding of form data (i.e. UTF-8) attr_accessor :encoding # When true, character encoding errors will never be never raised on form # submission. Default is false attr_accessor :ignore_encoding_error alias :elements :fields attr_reader :form_node attr_reader :page def initialize(node, mech = nil, page = nil) @enctype = node['enctype'] || 'application/x-www-form-urlencoded' @form_node = node @action = Mechanize::Util.html_unescape(node['action']) @method = (node['method'] || 'GET').upcase @name = node['name'] @clicked_buttons = [] @page = page @mech = mech @encoding = node['accept-charset'] || (page && page.encoding) || nil @ignore_encoding_error = false parse end # Returns whether or not the form contains a field with +field_name+ def has_field?(field_name) fields.find { |f| f.name == field_name } end alias :has_key? :has_field? # Returns whether or not the form contains a field with +value+ def has_value?(value) fields.find { |f| f.value == value } end # Returns all field names (keys) for this form def keys fields.map(&:name) end # Returns all field values for this form def values fields.map(&:value) end # Returns all buttons of type Submit def submits @submits ||= buttons.select { |f| f.class == Submit } end # Returns all buttons of type Reset def resets @resets ||= buttons.select { |f| f.class == Reset } end # Returns all fields of type Text def texts @texts ||= fields.select { |f| f.class == Text } end # Returns all fields of type Hidden def hiddens @hiddens ||= fields.select { |f| f.class == Hidden } end # Returns all fields of type Textarea def textareas @textareas ||= fields.select { |f| f.class == Textarea } end # Returns all fields of type Keygen def keygens @keygens ||= fields.select { |f| f.class == Keygen } end # Returns whether or not the form contains a Submit button named +button_name+ def submit_button?(button_name) submits.find { |f| f.name == button_name } end # Returns whether or not the form contains a Reset button named +button_name+ def reset_button?(button_name) resets.find { |f| f.name == button_name } end # Returns whether or not the form contains a Text field named +field_name+ def text_field?(field_name) texts.find { |f| f.name == field_name } end # Returns whether or not the form contains a Hidden field named +field_name+ def hidden_field?(field_name) hiddens.find { |f| f.name == field_name } end # Returns whether or not the form contains a Textarea named +field_name+ def textarea_field?(field_name) textareas.find { |f| f.name == field_name } end # This method is a shortcut to get form's DOM id. # Common usage: # page.form_with(:dom_id => "foorm") # Note that you can also use +:id+ to get to this method: # page.form_with(:id => "foorm") def dom_id form_node['id'] end # This method is a shortcut to get form's DOM class. # Common usage: # page.form_with(:dom_class => "foorm") # Note that you can also use +:class+ to get to this method: # page.form_with(:class => "foorm") def dom_class form_node['class'] end # Add a field with +field_name+ and +value+ def add_field!(field_name, value = nil) fields << Field.new({'name' => field_name}, value) end ## # This method sets multiple fields on the form. It takes a list of +fields+ # which are name, value pairs. # # If there is more than one field found with the same name, this method will # set the first one found. If you want to set the value of a duplicate # field, use a value which is a Hash with the key as the index in to the # form. The index is zero based. # # For example, to set the second field named 'foo', you could do the # following: # # form.set_fields :foo => { 1 => 'bar' } def set_fields fields = {} fields.each do |name, v| case v when Hash v.each do |index, value| self.fields_with(:name => name.to_s)[index].value = value end else value = nil index = 0 [v].flatten.each do |val| index = val.to_i if value value = val unless value end self.fields_with(:name => name.to_s)[index].value = value end end end # Fetch the value of the first input field with the name passed in. Example: # puts form['name'] def [](field_name) f = field(field_name) f && f.value end # Set the value of the first input field with the name passed in. Example: # form['name'] = 'Aaron' def []=(field_name, value) f = field(field_name) if f f.value = value else add_field!(field_name, value) end end # Treat form fields like accessors. def method_missing(meth, *args) method = meth.to_s.gsub(/=$/, '') if field(method) return field(method).value if args.empty? return field(method).value = args[0] end super end # Submit the form. Does not include the +button+ as a form parameter. # Use +click_button+ or provide button as a parameter. def submit button = nil, headers = {} @mech.submit(self, button, headers) end # Submit form using +button+. Defaults # to the first button. def click_button(button = buttons.first) submit(button) end # This method is sub-method of build_query. # It converts charset of query value of fields into expected one. def proc_query(field) return unless field.query_value field.query_value.map{|(name, val)| [from_native_charset(name), from_native_charset(val.to_s)] } end private :proc_query def from_native_charset str Mechanize::Util.from_native_charset(str, encoding, @ignore_encoding_error, @mech && @mech.log) end private :from_native_charset # This method builds an array of arrays that represent the query # parameters to be used with this form. The return value can then # be used to create a query string for this form. def build_query(buttons = []) query = [] @mech.log.info("form encoding: #{encoding}") if @mech && @mech.log save_hash_field_order successful_controls = [] (fields + checkboxes).reject do |f| f.node["disabled"] end.sort.each do |f| case f when Mechanize::Form::CheckBox if f.checked successful_controls << f end when Mechanize::Form::Field successful_controls << f end end radio_groups = {} radiobuttons.each do |f| fname = from_native_charset(f.name) radio_groups[fname] ||= [] radio_groups[fname] << f end # take one radio button from each group radio_groups.each_value do |g| checked = g.select(&:checked) if checked.uniq.size > 1 then values = checked.map(&:value).join(', ').inspect name = checked.first.name.inspect raise Mechanize::Error, "radiobuttons #{values} are checked in the #{name} group, " \ "only one is allowed" else successful_controls << checked.first unless checked.empty? end end @clicked_buttons.each { |b| successful_controls << b } successful_controls.sort.each do |ctrl| # DOM order qval = proc_query(ctrl) query.push(*qval) end query end # This method adds an index to all fields that have Hash nodes. This # enables field sorting to maintain order. def save_hash_field_order index = 0 fields.each do |field| if Hash === field.node field.index = index index += 1 end end end # This method adds a button to the query. If the form needs to be # submitted with multiple buttons, pass each button to this method. def add_button_to_query(button) unless button.node.document == @form_node.document then message = "#{button.inspect} does not belong to the same page as " \ "the form #{@name.inspect} in #{@page.uri}" raise ArgumentError, message end @clicked_buttons << button end # This method allows the same form to be submitted second time # with the different submit button being clicked. def reset # In the future, should add more functionality here to reset the form values to their defaults. @clicked_buttons = [] end CRLF = "\r\n".freeze # This method calculates the request data to be sent back to the server # for this form, depending on if this is a regular post, get, or a # multi-part post, def request_data query_params = build_query() case @enctype.downcase when /^multipart\/form-data/ boundary = rand_string(20) @enctype = "multipart/form-data; boundary=#{boundary}" delimiter = "--#{boundary}\r\n" data = ::String.new query_params.each do |k,v| if k data << delimiter param_to_multipart(k, v, data) end end @file_uploads.each do |f| data << delimiter file_to_multipart(f, data) end data << "--#{boundary}--\r\n" else Mechanize::Util.build_query_string(query_params) end end # Removes all fields with name +field_name+. def delete_field!(field_name) @fields.delete_if{ |f| f.name == field_name} end ## # :method: field_with(criteria) # # Find one field that matches +criteria+ # Example: # form.field_with(:id => "exact_field_id").value = 'hello' ## # :method: field_with!(criteria) # # Same as +field_with+ but raises an ElementNotFoundError if no field matches # +criteria+ ## # :method: fields_with(criteria) # # Find all fields that match +criteria+ # Example: # form.fields_with(:value => /foo/).each do |field| # field.value = 'hello!' # end elements_with :field ## # :method: button_with(criteria) # # Find one button that matches +criteria+ # Example: # form.button_with(:value => /submit/).value = 'hello' ## # :method: button_with!(criteria) # # Same as +button_with+ but raises an ElementNotFoundError if no button # matches +criteria+ ## # :method: buttons_with(criteria) # # Find all buttons that match +criteria+ # Example: # form.buttons_with(:value => /submit/).each do |button| # button.value = 'hello!' # end elements_with :button ## # :method: file_upload_with(criteria) # # Find one file upload field that matches +criteria+ # Example: # form.file_upload_with(:file_name => /picture/).value = 'foo' ## # :mehtod: file_upload_with!(criteria) # # Same as +file_upload_with+ but raises an ElementNotFoundError if no button # matches +criteria+ ## # :method: file_uploads_with(criteria) # # Find all file upload fields that match +criteria+ # Example: # form.file_uploads_with(:file_name => /picutre/).each do |field| # field.value = 'foo!' # end elements_with :file_upload ## # :method: radiobutton_with(criteria) # # Find one radio button that matches +criteria+ # Example: # form.radiobutton_with(:name => /woo/).check ## # :mehtod: radiobutton_with!(criteria) # # Same as +radiobutton_with+ but raises an ElementNotFoundError if no button # matches +criteria+ ## # :method: radiobuttons_with(criteria) # # Find all radio buttons that match +criteria+ # Example: # form.radiobuttons_with(:name => /woo/).each do |field| # field.check # end elements_with :radiobutton ## # :method: checkbox_with(criteria) # # Find one checkbox that matches +criteria+ # Example: # form.checkbox_with(:name => /woo/).check ## # :mehtod: checkbox_with!(criteria) # # Same as +checkbox_with+ but raises an ElementNotFoundError if no button # matches +criteria+ ## # :method: checkboxes_with(criteria) # # Find all checkboxes that match +criteria+ # Example: # form.checkboxes_with(:name => /woo/).each do |field| # field.check # end elements_with :checkbox, :checkboxes def pretty_print(q) # :nodoc: q.object_group(self) { q.breakable; q.group(1, '{name', '}') { q.breakable; q.pp name } q.breakable; q.group(1, '{method', '}') { q.breakable; q.pp method } q.breakable; q.group(1, '{action', '}') { q.breakable; q.pp action } q.breakable; q.group(1, '{fields', '}') { fields.each do |field| q.breakable q.pp field end } q.breakable; q.group(1, '{radiobuttons', '}') { radiobuttons.each { |b| q.breakable; q.pp b } } q.breakable; q.group(1, '{checkboxes', '}') { checkboxes.each { |b| q.breakable; q.pp b } } q.breakable; q.group(1, '{file_uploads', '}') { file_uploads.each { |b| q.breakable; q.pp b } } q.breakable; q.group(1, '{buttons', '}') { buttons.each { |b| q.breakable; q.pp b } } } end alias inspect pretty_inspect # :nodoc: private def parse @fields = [] @buttons = [] @file_uploads = [] @radiobuttons = [] @checkboxes = [] # Find all input tags form_node.search('input').each do |node| type = (node['type'] || 'text').downcase name = node['name'] next if name.nil? && !%w[submit button image].include?(type) case type when 'radio' @radiobuttons << RadioButton.new(node, self) when 'checkbox' @checkboxes << CheckBox.new(node, self) when 'file' @file_uploads << FileUpload.new(node, nil) when 'submit' @buttons << Submit.new(node) when 'button' @buttons << Button.new(node) when 'reset' @buttons << Reset.new(node) when 'image' @buttons << ImageButton.new(node) when 'hidden' @fields << Hidden.new(node, node['value'] || '') when 'text' @fields << Text.new(node, node['value'] || '') when 'textarea' @fields << Textarea.new(node, node['value'] || '') else @fields << Field.new(node, node['value'] || '') end end # Find all textarea tags form_node.search('textarea').each do |node| next unless node['name'] @fields << Textarea.new(node, node.inner_text) end # Find all select tags form_node.search('select').each do |node| next unless node['name'] if node.has_attribute? 'multiple' @fields << MultiSelectList.new(node) else @fields << SelectList.new(node) end end # Find all submit button tags # FIXME: what can I do with the reset buttons? form_node.search('button').each do |node| type = (node['type'] || 'submit').downcase next if type == 'reset' @buttons << Button.new(node) end # Find all keygen tags form_node.search('keygen').each do |node| @fields << Keygen.new(node, node['value'] || '') end end unless ::String.method_defined?(:b) # Define String#b for Ruby < 2.0 class ::String def b dup.force_encoding(Encoding::ASCII_8BIT) end end end def rand_string(len = 10) chars = ("a".."z").to_a + ("A".."Z").to_a string = ::String.new 1.upto(len) { |i| string << chars[rand(chars.size-1)] } string end def mime_value_quote(str) str.b.gsub(/(["\r\\])/, '\\\\\1') end def param_to_multipart(name, value, buf = ::String.new) buf << "Content-Disposition: form-data; name=\"".freeze << mime_value_quote(name) << "\"\r\n\r\n".freeze << value.b << CRLF end def file_to_multipart(file, buf = ::String.new) file_name = file.file_name ? ::File.basename(file.file_name) : '' body = buf << "Content-Disposition: form-data; name=\"".freeze << mime_value_quote(file.name) << "\"; filename=\"".freeze << mime_value_quote(file_name) << "\"\r\nContent-Transfer-Encoding: binary\r\n".freeze if file.file_data.nil? and file.file_name file.file_data = File.binread(file.file_name) file.mime_type = WEBrick::HTTPUtils.mime_type(file.file_name, WEBrick::HTTPUtils::DefaultMimeTypes) end if file.mime_type body << "Content-Type: ".freeze << file.mime_type << CRLF end body << CRLF if file_data = file.file_data if file_data.respond_to? :read body << file_data.read.force_encoding(Encoding::ASCII_8BIT) else body << file_data.b end end body << CRLF end end require 'mechanize/form/field' require 'mechanize/form/button' require 'mechanize/form/hidden' require 'mechanize/form/text' require 'mechanize/form/textarea' require 'mechanize/form/submit' require 'mechanize/form/reset' require 'mechanize/form/file_upload' require 'mechanize/form/keygen' require 'mechanize/form/image_button' require 'mechanize/form/multi_select_list' require 'mechanize/form/option' require 'mechanize/form/radio_button' require 'mechanize/form/check_box' require 'mechanize/form/select_list' mechanize-2.7.5/lib/mechanize/parser.rb0000644000004100000410000000655112772546476020105 0ustar www-datawww-data## # The parser module provides standard methods for accessing the headers and # content of a response that are shared across pluggable parsers. module Mechanize::Parser extend Forwardable special_filenames = Regexp.union %w[ AUX COM1 COM2 COM3 COM4 COM5 COM6 COM7 COM8 COM9 CON LPT1 LPT2 LPT3 LPT4 LPT5 LPT6 LPT7 LPT8 LPT9 NUL PRN ] ## # Special filenames that must be escaped SPECIAL_FILENAMES = /\A#{special_filenames}/i ## # The URI this file was retrieved from attr_accessor :uri ## # The Mechanize::Headers for this file attr_accessor :response alias header response ## # The HTTP response code attr_accessor :code ## # :method: [] # # :call-seq: # [](header) # # Access HTTP +header+ by name def_delegator :header, :[], :[] ## # :method: []= # # :call-seq: # []=(header, value) # # Set HTTP +header+ to +value+ def_delegator :header, :[]=, :[]= ## # :method: key? # # :call-seq: # key?(header) # # Is the named +header+ present? def_delegator :header, :key?, :key? ## # :method: each # # Enumerate HTTP headers def_delegator :header, :each, :each ## # :method: each # # Enumerate HTTP headers in capitalized (canonical) form def_delegator :header, :canonical_each, :canonical_each ## # Extracts the filename from a Content-Disposition header in the #response # or from the URI. If +full_path+ is true the filename will include the # host name and path to the resource, otherwise a filename in the current # directory is given. def extract_filename full_path = @full_path handled = false if @uri then uri = @uri uri += 'index.html' if uri.path.end_with? '/' path = uri.path.split(/\//) filename = path.pop || 'index.html' else path = [] filename = 'index.html' end # Set the filename if disposition = @response['content-disposition'] then content_disposition = Mechanize::HTTP::ContentDispositionParser.parse disposition if content_disposition && content_disposition.filename && content_disposition.filename != '' then filename = content_disposition.filename filename = filename.split(/[\\\/]/).last handled = true end end if not handled and @uri then filename << '.html' unless filename =~ /\./ filename << "?#{@uri.query}" if @uri.query end if SPECIAL_FILENAMES =~ filename then filename = "_#{filename}" end filename = filename.tr "\x00-\x20<>:\"/\\|?*", '_' @filename = if full_path then File.join @uri.host, path, filename else filename end end ## # Creates a Mechanize::Header from the Net::HTTPResponse +response+. # # This allows the Net::HTTPResponse to be garbage collected sooner. def fill_header response @response = Mechanize::Headers.new response.each { |k,v| @response[k] = v } if response @response end ## # Finds a free filename based on +filename+, but is not race-free def find_free_name filename base_filename = filename ||= @filename number = 1 while File.exist? filename do filename = "#{base_filename}.#{number}" number += 1 end filename end end mechanize-2.7.5/lib/mechanize/prependable.rb0000644000004100000410000000447612772546476021076 0ustar www-datawww-data# Fake implementation of prepend(), which does not support overriding # inherited methods nor methods that are formerly overridden by # another invocation of prepend(). # # Here's what .prepend() does: # # - Create an anonymous stub module (hereinafter ) and define # # that calls #_without_ for each # instance method of . # # - Rename # to #_without_ for each # instance method of . # # - Include and into in that order. # # This way, a call of # is dispatched to # , which may call super which is dispatched to # #, which finally calls # #_without_ which is used to be called # #. # # Usage: # # class Mechanize # # module with methods that overrides those of X # module Y # end # # unless X.respond_to?(:prepend, true) # require 'mechanize/prependable' # X.extend(Prependable) # end # # class X # prepend Y # end # end module Mechanize::Prependable def prepend(mod) stub = Module.new mod_id = (mod.name || 'Module__%d' % mod.object_id).gsub(/::/, '__') mod.instance_methods.each { |name| method_defined?(name) or next original = instance_method(name) if original.owner != self warn "%s cannot override an inherited method: %s(%s)#%s" % [ __method__, self, original.owner, name ] next end name = name.to_s name_without = name.sub(/(?=[?!=]?\z)/) { '_without_%s' % mod_id } arity = original.arity arglist = ( if arity >= 0 (1..arity).map { |i| 'x%d' % i } else (1..(-arity - 1)).map { |i| 'x%d' % i } << '*a' end << '&b' ).join(', ') if name.end_with?('=') stub.module_eval %{ def #{name}(#{arglist}) __send__(:#{name_without}, #{arglist}) end } else stub.module_eval %{ def #{name}(#{arglist}) #{name_without}(#{arglist}) end } end module_eval { alias_method name_without, name remove_method name } } include(mod, stub) end private :prepend end mechanize-2.7.5/lib/mechanize/unsupported_scheme_error.rb0000644000004100000410000000025112772546476023725 0ustar www-datawww-dataclass Mechanize::UnsupportedSchemeError < Mechanize::Error attr_accessor :scheme, :uri def initialize(scheme, uri) @scheme = scheme @uri = uri end end mechanize-2.7.5/lib/mechanize/element_matcher.rb0000644000004100000410000000310412772546476021734 0ustar www-datawww-datamodule Mechanize::ElementMatcher def elements_with singular, plural = "#{singular}s" class_eval <<-CODE def #{plural}_with criteria = {} selector = method = nil if String === criteria then criteria = {:name => criteria} else criteria = criteria.each_with_object({}) { |(k, v), h| case k = k.to_sym when :id h[:dom_id] = v when :class h[:dom_class] = v when :search, :xpath, :css if v selector = v method = k end else h[k] = v end } end f = select_#{plural}(selector, method).find_all do |thing| criteria.all? do |k,v| v === thing.send(k) end end yield f if block_given? f end def #{singular}_with criteria = {} f = #{plural}_with(criteria).first yield f if block_given? f end def #{singular}_with! criteria = {} f = #{singular}_with(criteria) raise Mechanize::ElementNotFoundError.new(self, :#{singular}, criteria) if f.nil? yield f if block_given? f end def select_#{plural} selector, method = :search if selector.nil? then #{plural} else nodes = __send__(method, selector) #{plural}.find_all do |element| nodes.include?(element.node) end end end alias :#{singular} :#{singular}_with CODE end end mechanize-2.7.5/lib/mechanize/chunked_termination_error.rb0000644000004100000410000000026012772546476024043 0ustar www-datawww-data## # Raised when Mechanize detects the chunked transfer-encoding may be # incorrectly terminated. class Mechanize::ChunkedTerminationError < Mechanize::ResponseReadError end mechanize-2.7.5/lib/mechanize/content_type_error.rb0000644000004100000410000000056712772546476022536 0ustar www-datawww-data## # This error is raised when a pluggable parser tries to parse a content type # that it does not know how to handle. For example if Mechanize::Page were to # try to parse a PDF, a ContentTypeError would be thrown. class Mechanize::ContentTypeError < Mechanize::Error attr_reader :content_type def initialize(content_type) @content_type = content_type end end mechanize-2.7.5/lib/mechanize/file.rb0000644000004100000410000000435512772546476017530 0ustar www-datawww-data## # This is the base class for the Pluggable Parsers. If Mechanize cannot find # an appropriate class to use for the content type, this class will be used. # For example, if you download an image/jpeg, Mechanize will not know how to # parse it, so this class will be instantiated. # # This is a good class to use as the base class for building your own # pluggable parsers. # # == Example # # require 'mechanize' # # agent = Mechanize.new # agent.get('http://example.com/foo.jpg').class #=> Mechanize::File class Mechanize::File include Mechanize::Parser ## # The HTTP response body, the raw file contents attr_accessor :body ## # The filename for this file based on the content-disposition of the # response or the basename of the URL attr_accessor :filename alias content body ## # Creates a new file retrieved from the given +uri+ and +response+ object. # The +body+ is the HTTP response body and +code+ is the HTTP status. def initialize uri = nil, response = nil, body = nil, code = nil @uri = uri @body = body @code = code @full_path = false unless defined? @full_path fill_header response extract_filename yield self if block_given? end ## # Use this method to save the content of this object to +filename+. # returns the filename # # file.save 'index.html' # file.save 'index.html' # saves to index.html.1 # # uri = URI 'http://localhost/test.html' # file = Mechanize::File.new uri, nil, '' # filename = file.save # saves to test.html # puts filename # test.html def save filename = nil filename = find_free_name filename save! filename end alias save_as save ## # Use this method to save the content of this object to +filename+. # This method will overwrite any existing filename that exists with the # same name. # returns the filename # # file.save 'index.html' # file.save! 'index.html' # overwrite original file # filename = file.save! 'index.html' # overwrite original file with filename 'index.html' def save! filename = nil filename ||= @filename dirname = File.dirname filename FileUtils.mkdir_p dirname open filename, 'wb' do |f| f.write body end filename end end mechanize-2.7.5/lib/mechanize/file_saver.rb0000644000004100000410000000151512772546476020723 0ustar www-datawww-data## # This is a pluggable parser that automatically saves every file it # encounters. Unlike Mechanize::DirectorySaver, the file saver saves the # responses as a tree, reflecting the host and file path. # # == Example # # This example saves all .pdf files # # require 'mechanize' # # agent = Mechanize.new # agent.pluggable_parser.pdf = Mechanize::FileSaver # agent.get 'http://example.com/foo.pdf' # # Dir['example.com/*'] # => foo.pdf class Mechanize::FileSaver < Mechanize::Download attr_reader :filename def initialize uri = nil, response = nil, body_io = nil, code = nil @full_path = true super save @filename end ## # The save_as alias is provided for backwards compatibility with mechanize # 2.0. It will be removed in mechanize 3. #-- # TODO remove in mechanize 3 alias save_as save end mechanize-2.7.5/lib/mechanize/version.rb0000644000004100000410000000005012772546476020262 0ustar www-datawww-dataclass Mechanize VERSION = "2.7.5" end mechanize-2.7.5/lib/mechanize/redirect_not_get_or_head_error.rb0000644000004100000410000000104212772546476025011 0ustar www-datawww-data## # Raised when a POST, PUT, or DELETE request results in a redirect # see RFC 2616 10.3.2, 10.3.3 http://www.ietf.org/rfc/rfc2616.txt class Mechanize::RedirectNotGetOrHeadError < Mechanize::Error attr_reader :page, :response_code, :verb, :uri def initialize(page, verb) @page = page @verb = verb @uri = page.uri @response_code = page.code end def to_s method = @verb.to_s.upcase "#{@response_code} redirect received after a #{method} request" end alias :inspect :to_s end mechanize-2.7.5/lib/mechanize/http.rb0000644000004100000410000000030412772546476017556 0ustar www-datawww-data## # Mechanize::HTTP contains classes for communicated with HTTP servers. All # API under this namespace is considered private and is subject to change at # any time. class Mechanize::HTTP end mechanize-2.7.5/lib/mechanize/file_connection.rb0000644000004100000410000000041712772546476021742 0ustar www-datawww-data## # Wrapper to make a file URI work like an http URI class Mechanize::FileConnection @instance = nil def self.new *a @instance ||= super end def request uri, request yield Mechanize::FileResponse.new Mechanize::Util.uri_unescape uri.path end end mechanize-2.7.5/lib/mechanize/download.rb0000644000004100000410000000351212772546476020412 0ustar www-datawww-data## # Download is a pluggable parser for downloading files without loading them # into memory first. You may subclass this class to handle content types you # do not wish to load into memory first. # # See Mechanize::PluggableParser for instructions on using this class. class Mechanize::Download include Mechanize::Parser ## # The filename for this file based on the content-disposition of the # response or the basename of the URL attr_accessor :filename ## # Accessor for the IO-like that contains the body attr_reader :body_io alias content body_io ## # Creates a new download retrieved from the given +uri+ and +response+ # object. The +body_io+ is an IO-like containing the HTTP response body and # +code+ is the HTTP status. def initialize uri = nil, response = nil, body_io = nil, code = nil @uri = uri @body_io = body_io @code = code @full_path = false unless defined? @full_path fill_header response extract_filename yield self if block_given? end ## # The body of this response as a String. # # Take care, this may use lots of memory if the response body is large. def body @body_io.read.tap { @body_io.rewind } end ## # Saves a copy of the body_io to +filename+ # returns the filename def save filename = nil filename = find_free_name filename save! filename end alias save_as save ## # Use this method to save the content of body_io to +filename+. # This method will overwrite any existing filename that exists with the # same name. # returns the filename def save! filename = nil filename ||= @filename dirname = File.dirname filename FileUtils.mkdir_p dirname open filename, 'wb' do |io| until @body_io.eof? do io.write @body_io.read 16384 end end filename end end mechanize-2.7.5/lib/mechanize/cookie_jar.rb0000644000004100000410000001053112772546476020707 0ustar www-datawww-datawarn 'mechanize/cookie_jar will be deprecated. Please migrate to the http-cookie APIs.' if $VERBOSE require 'http/cookie_jar' require 'http/cookie_jar/yaml_saver' require 'mechanize/cookie' class Mechanize module CookieJarIMethods include CookieDeprecated def add(arg1, arg2 = nil) if arg2 __deprecated__ 'add and origin=' super arg2.dup.tap { |ncookie| begin ncookie.origin = arg1 rescue return nil end } else super arg1 end end # See HTTP::CookieJar#add. def add!(cookie) __deprecated__ :add cookie.domain.nil? and raise NoMethodError, 'raised for compatibility' @store.add(cookie) self end # See HTTP::CookieJar#save. def save_as(filename, *options) __deprecated__ :save save(filename, *options) end # See HTTP::CookieJar#clear. def clear! __deprecated__ :clear clear end # See HTTP::CookieJar#store. def jar __deprecated__ :store @store.instance_variable_get(:@jar) end # See HTTP::CookieJar#load. def load_cookiestxt(io) __deprecated__ :load load(io, :cookiestxt) end # See HTTP::CookieJar#save. def dump_cookiestxt(io) __deprecated__ :save save(io, :cookiestxt) end end class CookieJar < ::HTTP::CookieJar def save(output, *options) output.respond_to?(:write) or return open(output, 'w') { |io| save(io, *options) } opthash = { :format => :yaml, :session => false, } case options.size when 0 when 1 case options = options.first when Symbol opthash[:format] = options else opthash.update(options) if options end when 2 opthash[:format], options = options opthash.update(options) if options else raise ArgumentError, 'wrong number of arguments (%d for 1-3)' % (1 + options.size) end return super(output, opthash) if opthash[:format] != :yaml session = opthash[:session] nstore = HashStore.new each { |cookie| next if !session && cookie.session? if cookie.max_age cookie = cookie.dup cookie.expires = cookie.expires # convert max_age to expires end nstore.add(cookie) } yaml = YAML.dump(nstore.instance_variable_get(:@jar)) # a gross hack yaml.gsub!(%r{^( [^ ].*: !ruby/object:)HTTP::Cookie$}) { $1 + 'Mechanize::Cookie' } yaml.gsub!(%r{^( expires: )(?:|!!null|(.+?)) *$}) { $1 + ($2 ? Time.parse($2).httpdate : '') } output.write yaml self end def load(input, *options) input.respond_to?(:write) or return open(input, 'r') { |io| load(io, *options) } opthash = { :format => :yaml, :session => false, } case options.size when 0 when 1 case options = options.first when Symbol opthash[:format] = options else if hash = Hash.try_convert(options) opthash.update(hash) end end when 2 opthash[:format], options = options if hash = Hash.try_convert(options) opthash.update(hash) end else raise ArgumentError, 'wrong number of arguments (%d for 1-3)' % (1 + options.size) end return super(input, opthash) if opthash[:format] != :yaml begin data = YAML.load(input) rescue ArgumentError @logger.warn "unloadable YAML cookie data discarded" if @logger return self end case data when Array # Forward compatibility data.each { |cookie| add(cookie) } when Hash data.each { |domain, paths| paths.each { |path, names| names.each { |cookie_name, cookie| add(cookie) } } } else @logger.warn "incompatible YAML cookie data discarded" if @logger return self end end end # Compatibility for Ruby 1.8/1.9 unless ::HTTP::CookieJar.respond_to?(:prepend, true) require 'mechanize/prependable' class ::HTTP::CookieJar extend Prependable end end class ::HTTP::CookieJar prepend CookieJarIMethods end end mechanize-2.7.5/lib/mechanize/test_case/0000755000004100000410000000000012772546476020227 5ustar www-datawww-datamechanize-2.7.5/lib/mechanize/test_case/one_cookie_no_spaces_servlet.rb0000644000004100000410000000053212772546476026464 0ustar www-datawww-dataclass OneCookieNoSpacesServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) cookie = WEBrick::Cookie.new("foo", "bar") cookie.path = "/" cookie.expires = Time.now + 86400 res.cookies << cookie.to_s.gsub(/; /, ';') res['Content-Type'] = "text/html" res.body = "hello" end end mechanize-2.7.5/lib/mechanize/test_case/modified_since_servlet.rb0000644000004100000410000000101212772546476025253 0ustar www-datawww-dataclass ModifiedSinceServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) s_time = 'Fri, 04 May 2001 00:00:38 GMT' my_time = Time.parse(s_time) if req['If-Modified-Since'] your_time = Time.parse(req['If-Modified-Since']) if my_time > your_time res.body = 'This page was updated since you requested' else res.status = 304 end else res.body = 'You did not send an If-Modified-Since header' end res['Last-Modified'] = s_time end end mechanize-2.7.5/lib/mechanize/test_case/verb_servlet.rb0000644000004100000410000000042012772546476023252 0ustar www-datawww-dataclass VerbServlet < WEBrick::HTTPServlet::AbstractServlet %w[HEAD GET POST PUT DELETE].each do |verb| eval <<-METHOD def do_#{verb}(req, res) res.header['X-Request-Method'] = #{verb.dump} res.body = #{verb.dump} end METHOD end end mechanize-2.7.5/lib/mechanize/test_case/servlets.rb0000644000004100000410000000527512772546476022434 0ustar www-datawww-datarequire 'mechanize/test_case/bad_chunking_servlet' require 'mechanize/test_case/basic_auth_servlet' require 'mechanize/test_case/content_type_servlet' require 'mechanize/test_case/digest_auth_servlet' require 'mechanize/test_case/file_upload_servlet' require 'mechanize/test_case/form_servlet' require 'mechanize/test_case/gzip_servlet' require 'mechanize/test_case/header_servlet' require 'mechanize/test_case/http_refresh_servlet' require 'mechanize/test_case/infinite_redirect_servlet' require 'mechanize/test_case/infinite_refresh_servlet' require 'mechanize/test_case/many_cookies_as_string_servlet' require 'mechanize/test_case/many_cookies_servlet' require 'mechanize/test_case/modified_since_servlet' require 'mechanize/test_case/ntlm_servlet' require 'mechanize/test_case/one_cookie_no_spaces_servlet' require 'mechanize/test_case/one_cookie_servlet' require 'mechanize/test_case/quoted_value_cookie_servlet' require 'mechanize/test_case/redirect_servlet' require 'mechanize/test_case/referer_servlet' require 'mechanize/test_case/refresh_with_empty_url' require 'mechanize/test_case/refresh_without_url' require 'mechanize/test_case/response_code_servlet' require 'mechanize/test_case/robots_txt_servlet' require 'mechanize/test_case/send_cookies_servlet' require 'mechanize/test_case/verb_servlet' MECHANIZE_TEST_CASE_SERVLETS = { '/bad_chunking' => BadChunkingServlet, '/basic_auth' => BasicAuthServlet, '/content_type_test' => ContentTypeServlet, '/digest_auth' => DigestAuthServlet, '/file_upload' => FileUploadServlet, '/form post' => FormServlet, '/form_post' => FormServlet, '/gzip' => GzipServlet, '/http_headers' => HeaderServlet, '/http_refresh' => HttpRefreshServlet, '/if_modified_since' => ModifiedSinceServlet, '/infinite_redirect' => InfiniteRedirectServlet, '/infinite_refresh' => InfiniteRefreshServlet, '/many_cookies' => ManyCookiesServlet, '/many_cookies_as_string' => ManyCookiesAsStringServlet, '/ntlm' => NTLMServlet, '/one_cookie' => OneCookieServlet, '/one_cookie_no_space' => OneCookieNoSpacesServlet, '/quoted_value_cookie' => QuotedValueCookieServlet, '/redirect' => RedirectServlet, '/referer' => RefererServlet, '/refresh_with_empty_url' => RefreshWithEmptyUrl, '/refresh_without_url' => RefreshWithoutUrl, '/response_code' => ResponseCodeServlet, '/robots.txt' => RobotsTxtServlet, '/robots_txt' => RobotsTxtServlet, '/send_cookies' => SendCookiesServlet, '/verb' => VerbServlet, } mechanize-2.7.5/lib/mechanize/test_case/robots_txt_servlet.rb0000644000004100000410000000053712772546476024534 0ustar www-datawww-dataclass RobotsTxtServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) if /301/ === req['Host'] && req.path == '/robots.txt' res['Location'] = 'http://301/robots_txt' res.code = 301 else res['Content-Type'] = 'text/plain' res.body = <<-'EOF' User-Agent: * Disallow: /norobots EOF end end end mechanize-2.7.5/lib/mechanize/test_case/gzip_servlet.rb0000644000004100000410000000140212772546476023266 0ustar www-datawww-datarequire 'stringio' require 'zlib' class GzipServlet < WEBrick::HTTPServlet::AbstractServlet TEST_DIR = File.expand_path '../../../../test', __FILE__ def do_GET(req, res) if req['Accept-Encoding'] !~ /gzip/ then res.code = 400 res.body = 'Content-Encoding: gzip is not supported by your user-agent' return end if name = req.query['file'] then open "#{TEST_DIR}/htdocs/#{name}" do |io| string = "" zipped = StringIO.new string, 'w' Zlib::GzipWriter.wrap zipped do |gz| gz.write io.read end res.body = string end else res.body = '' end res['Content-Encoding'] = req['X-ResponseContentEncoding'] || 'gzip' res['Content-Type'] = "text/html" end end mechanize-2.7.5/lib/mechanize/test_case/header_servlet.rb0000644000004100000410000000037012772546476023550 0ustar www-datawww-dataclass HeaderServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) res.content_type = "text/plain" req.query.each do |x,y| res[x] = y end req.each do |k, v| res.body << "#{k}|#{v}\n" end end end mechanize-2.7.5/lib/mechanize/test_case/one_cookie_servlet.rb0000644000004100000410000000047512772546476024440 0ustar www-datawww-dataclass OneCookieServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) cookie = WEBrick::Cookie.new("foo", "bar") cookie.path = "/" cookie.expires = Time.now + 86400 res.cookies << cookie res['Content-Type'] = "text/html" res.body = "hello" end end mechanize-2.7.5/lib/mechanize/test_case/form_servlet.rb0000644000004100000410000000172212772546476023265 0ustar www-datawww-dataclass FormServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) res.content_type = 'text/html' query = [] req.query.each_key { |k| key = WEBrick::HTTPUtils.unescape k req.query[k].each_data { |data| value = WEBrick::HTTPUtils.unescape data query << "
  • #{key}:#{value}" } } res.body = <<-BODY GET results
      #{query.join "\n"}
    #{req.query}
    BODY end def do_POST(req, res) res.content_type = 'text/html' query = [] req.query.each_key { |k| key = WEBrick::HTTPUtils.unescape k req.query[k].each_data { |data| value = WEBrick::HTTPUtils.unescape data query << "
  • #{key}:#{value}" } } res.body = <<-BODY POST results
      #{query.join "\n"}
    #{req.body}
    BODY end end mechanize-2.7.5/lib/mechanize/test_case/quoted_value_cookie_servlet.rb0000644000004100000410000000051612772546476026350 0ustar www-datawww-dataclass QuotedValueCookieServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) cookie = WEBrick::Cookie.new("quoted", "\"value\"") cookie.path = "/" cookie.expires = Time.now + 86400 res.cookies << cookie res['Content-Type'] = "text/html" res.body = "hello" end end mechanize-2.7.5/lib/mechanize/test_case/many_cookies_servlet.rb0000644000004100000410000000211612772546476025000 0ustar www-datawww-dataclass ManyCookiesServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) name_cookie = WEBrick::Cookie.new("name", "Aaron") name_cookie.path = "/" name_cookie.expires = Time.now + 86400 res.cookies << name_cookie res.cookies << name_cookie res.cookies << name_cookie res.cookies << name_cookie expired_cookie = WEBrick::Cookie.new("expired", "doh") expired_cookie.path = "/" expired_cookie.expires = Time.now - 86400 res.cookies << expired_cookie different_path_cookie = WEBrick::Cookie.new("a_path", "some_path") different_path_cookie.path = "/some_path" different_path_cookie.expires = Time.now + 86400 res.cookies << different_path_cookie no_path_cookie = WEBrick::Cookie.new("no_path", "no_path") no_path_cookie.expires = Time.now + 86400 res.cookies << no_path_cookie no_exp_path_cookie = WEBrick::Cookie.new("no_expires", "nope") no_exp_path_cookie.path = "/" res.cookies << no_exp_path_cookie res['Content-Type'] = "text/html" res.body = "hello" end end mechanize-2.7.5/lib/mechanize/test_case/refresh_without_url.rb0000644000004100000410000000050312772546476024655 0ustar www-datawww-dataclass RefreshWithoutUrl < WEBrick::HTTPServlet::AbstractServlet @@count = 0 def do_GET(req, res) address = "#{req.host}:#{req.port}" res['Content-Type'] = "text/html" @@count += 1 if @@count > 1 res['Refresh'] = "0; url=http://#{address}/"; else res['Refresh'] = "0"; end end end mechanize-2.7.5/lib/mechanize/test_case/send_cookies_servlet.rb0000644000004100000410000000052512772546476024767 0ustar www-datawww-dataclass SendCookiesServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) res.content_type = 'text/html' cookies = req.cookies.map do |c| "
  • #{c.name}:#{c.value}" end.join "\n" res.body = <<-BODY Your cookies
      #{cookies}
    BODY end end mechanize-2.7.5/lib/mechanize/test_case/response_code_servlet.rb0000644000004100000410000000056212772546476025153 0ustar www-datawww-dataclass ResponseCodeServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) res['Content-Type'] = req.query['ct'] || "text/html" if req.query['code'] code = req.query['code'].to_i case code when 300, 301, 302, 303, 304, 305, 307 res['Location'] = "/index.html" end res.status = code else end end end mechanize-2.7.5/lib/mechanize/test_case/ntlm_servlet.rb0000644000004100000410000000154112772546476023273 0ustar www-datawww-dataclass NTLMServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) if req['Authorization'] =~ /^NTLM (.*)/ then authorization = $1.unpack('m*').first if authorization =~ /^NTLMSSP\000\001/ then type_2 = 'TlRMTVNTUAACAAAADAAMADAAAAABAoEAASNFZ4mr' \ 'ze8AAAAAAAAAAGIAYgA8AAAARABPAE0AQQBJAE4A' \ 'AgAMAEQATwBNAEEASQBOAAEADABTAEUAUgBWAEUA' \ 'UgAEABQAZABvAG0AYQBpAG4ALgBjAG8AbQADACIA' \ 'cwBlAHIAdgBlAHIALgBkAG8AbQBhAGkAbgAuAGMA' \ 'bwBtAAAAAAA=' res['WWW-Authenticate'] = "NTLM #{type_2}" res.status = 401 elsif authorization =~ /^NTLMSSP\000\003/ then res.body = 'ok' else res['WWW-Authenticate'] = 'NTLM' res.status = 401 end else res['WWW-Authenticate'] = 'NTLM' res.status = 401 end end end mechanize-2.7.5/lib/mechanize/test_case/infinite_refresh_servlet.rb0000644000004100000410000000062712772546476025650 0ustar www-datawww-dataclass InfiniteRefreshServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) address = "#{req.host}:#{req.port}" res['Content-Type'] = req.query['ct'] || "text/html" res.status = req.query['code'] ? req.query['code'].to_i : '302' number = req.query['q'] ? req.query['q'].to_i : 0 res['Refresh'] = "0;url=http://#{address}/infinite_refresh?q=#{number + 1}\r\n"; end end mechanize-2.7.5/lib/mechanize/test_case/file_upload_servlet.rb0000644000004100000410000000062312772546476024604 0ustar www-datawww-dataclass FileUploadServlet < WEBrick::HTTPServlet::AbstractServlet def do_POST req, res res.body = req.body end def do_GET req, res res.content_type = 'text/html' res.body = <<-BODY Fill in this form

    You can POST anything to this endpoint, though

    BODY end end mechanize-2.7.5/lib/mechanize/test_case/http_refresh_servlet.rb0000644000004100000410000000050012772546476025010 0ustar www-datawww-dataclass HttpRefreshServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) res['Content-Type'] = req.query['ct'] || "text/html" refresh_time = req.query['refresh_time'] || 0 refresh_url = req.query['refresh_url'] || '/' res['Refresh'] = " #{refresh_time};url=#{refresh_url}\r\n"; end end mechanize-2.7.5/lib/mechanize/test_case/content_type_servlet.rb0000644000004100000410000000032412772546476025032 0ustar www-datawww-dataclass ContentTypeServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) ct = req.query['ct'] || "text/html; charset=utf-8" res['Content-Type'] = ct res.body = "Hello World" end end mechanize-2.7.5/lib/mechanize/test_case/infinite_redirect_servlet.rb0000644000004100000410000000055712772546476026015 0ustar www-datawww-dataclass InfiniteRedirectServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) res['Content-Type'] = req.query['ct'] || "text/html" res.status = req.query['code'] ? req.query['code'].to_i : '302' number = req.query['q'] ? req.query['q'].to_i : 0 res['Location'] = "/infinite_redirect?q=#{number + 1}" end alias :do_POST :do_GET end mechanize-2.7.5/lib/mechanize/test_case/basic_auth_servlet.rb0000644000004100000410000000112712772546476024423 0ustar www-datawww-dataclass BasicAuthServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req,res) htpd = nil Tempfile.open 'dot.htpasswd' do |io| htpd = WEBrick::HTTPAuth::Htpasswd.new(io.path) htpd.set_passwd('Blah', 'user', 'pass') end authenticator = WEBrick::HTTPAuth::BasicAuth.new({ :UserDB => htpd, :Realm => 'Blah', :Logger => Logger.new(nil) }) begin authenticator.authenticate(req,res) res.body = 'You are authenticated' rescue WEBrick::HTTPStatus::Unauthorized res.status = 401 end end alias :do_POST :do_GET end mechanize-2.7.5/lib/mechanize/test_case/referer_servlet.rb0000644000004100000410000000042212772546476023750 0ustar www-datawww-dataclass RefererServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) res['Content-Type'] = "text/html" res.body = req['Referer'] || '' end def do_POST(req, res) res['Content-Type'] = "text/html" res.body = req['Referer'] || '' end end mechanize-2.7.5/lib/mechanize/test_case/redirect_servlet.rb0000644000004100000410000000056612772546476024130 0ustar www-datawww-dataclass RedirectServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) res['Content-Type'] = req.query['ct'] || 'text/html' res.status = req.query['code'] ? req.query['code'].to_i : '302' res['Location'] = req['X-Location'] || '/verb' end alias :do_POST :do_GET alias :do_HEAD :do_GET alias :do_PUT :do_GET alias :do_DELETE :do_GET end mechanize-2.7.5/lib/mechanize/test_case/many_cookies_as_string_servlet.rb0000644000004100000410000000224712772546476027056 0ustar www-datawww-dataclass ManyCookiesAsStringServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) cookies = [] name_cookie = WEBrick::Cookie.new("name", "Aaron") name_cookie.path = "/" name_cookie.expires = Time.now + 86400 name_cookie.domain = 'localhost' cookies << name_cookie cookies << name_cookie cookies << name_cookie cookies << "#{name_cookie}; HttpOnly" expired_cookie = WEBrick::Cookie.new("expired", "doh") expired_cookie.path = "/" expired_cookie.expires = Time.now - 86400 cookies << expired_cookie different_path_cookie = WEBrick::Cookie.new("a_path", "some_path") different_path_cookie.path = "/some_path" different_path_cookie.expires = Time.now + 86400 cookies << different_path_cookie no_path_cookie = WEBrick::Cookie.new("no_path", "no_path") no_path_cookie.expires = Time.now + 86400 cookies << no_path_cookie no_exp_path_cookie = WEBrick::Cookie.new("no_expires", "nope") no_exp_path_cookie.path = "/" cookies << no_exp_path_cookie res['Set-Cookie'] = cookies.join(', ') res['Content-Type'] = "text/html" res.body = "hello" end end mechanize-2.7.5/lib/mechanize/test_case/digest_auth_servlet.rb0000644000004100000410000000136612772546476024626 0ustar www-datawww-datarequire 'logger' class DigestAuthServlet < WEBrick::HTTPServlet::AbstractServlet htpd = nil Tempfile.open 'digest.htpasswd' do |io| htpd = WEBrick::HTTPAuth::Htdigest.new(io.path) htpd.set_passwd('Blah', 'user', 'pass') end @@authenticator = WEBrick::HTTPAuth::DigestAuth.new({ :UserDB => htpd, :Realm => 'Blah', :Algorithm => 'MD5', :Logger => Logger.new(nil) }) def do_GET req, res def req.request_time; Time.now; end def req.request_uri; '/digest_auth'; end def req.request_method; 'GET'; end begin @@authenticator.authenticate req, res res.body = 'You are authenticated' rescue WEBrick::HTTPStatus::Unauthorized res.status = 401 end end alias :do_POST :do_GET end mechanize-2.7.5/lib/mechanize/test_case/server.rb0000644000004100000410000000165512772546476022071 0ustar www-datawww-datarequire 'webrick' require 'mechanize/test_case/servlets' server = WEBrick::HTTPServer.new :Port => 8000 server.mount_proc '/' do |req, res| res.content_type = 'text/html' servlets = MECHANIZE_TEST_CASE_SERVLETS.map do |path, servlet| "
    #{servlet}
    #{path}" end.join "\n" res.body = <<-BODY Mechanize Test Case Servlets

    This server allows you to test various mechanize behavior against other HTTP clients. Some endpoints may require headers be set to have a reasonable function, or may respond diffently to POST vs GET requests. Please see the servlet implementation and mechanize tests for further details.

    Here are the servlet endpoints available:

    #{servlets}
    BODY end MECHANIZE_TEST_CASE_SERVLETS.each do |path, servlet| server.mount path, servlet end trap 'INT' do server.shutdown end trap 'TERM' do server.shutdown end server.start mechanize-2.7.5/lib/mechanize/test_case/.document0000644000004100000410000000004012772546476022040 0ustar www-datawww-data# Don't document this directory mechanize-2.7.5/lib/mechanize/test_case/bad_chunking_servlet.rb0000644000004100000410000000037512772546476024741 0ustar www-datawww-dataclass BadChunkingServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET req, res res.keep_alive = false if res.respond_to? :keep_alive= res['Transfer-Encoding'] = 'chunked' res.body = <<-BODY a\r 0123456789\r 0\r BODY end end mechanize-2.7.5/lib/mechanize/test_case/refresh_with_empty_url.rb0000644000004100000410000000051112772546476025342 0ustar www-datawww-dataclass RefreshWithEmptyUrl < WEBrick::HTTPServlet::AbstractServlet @@count = 0 def do_GET(req, res) address = "#{req.host}:#{req.port}" res.content_type = "text/html" @@count += 1 if @@count > 1 res['Refresh'] = "0; url=http://#{address}/"; else res['Refresh'] = "0; url="; end end end mechanize-2.7.5/lib/mechanize/pluggable_parsers.rb0000644000004100000410000001150612772546476022306 0ustar www-datawww-datarequire 'mechanize/file' require 'mechanize/file_saver' require 'mechanize/page' require 'mechanize/xml_file' require 'mime/types' ## # Mechanize allows different parsers for different content types. Mechanize # uses PluggableParser to determine which parser to use for any content type. # To use your own parser or to change the default parsers, register them with # this class through Mechanize#pluggable_parser. # # The default parser for unregistered content types is Mechanize::File. # # The module Mechanize::Parser provides basic functionality for any content # type, so you may use it in custom parsers you write. For small files you # wish to perform in-memory operations on, you should subclass # Mechanize::File. For large files you should subclass Mechanize::Download as # the content is only loaded into memory in small chunks. # # When writing your own pluggable parser, be sure to provide a method #body # that returns a String containing the response body for compatibility with # Mechanize#get_file. # # == Example # # To create your own parser, just create a class that takes four parameters in # the constructor. Here is an example of registering a parser that handles # CSV files: # # require 'csv' # # class CSVParser < Mechanize::File # attr_reader :csv # # def initialize uri = nil, response = nil, body = nil, code = nil # super uri, response, body, code # @csv = CSV.parse body # end # end # # agent = Mechanize.new # agent.pluggable_parser.csv = CSVParser # agent.get('http://example.com/test.csv') # => CSVParser # # Now any response with a content type of 'text/csv' will initialize a # CSVParser and return that object to the caller. # # To register a parser for a content type that Mechanize does not know about, # use the hash syntax: # # agent.pluggable_parser['text/something'] = SomeClass # # To set the default parser, use #default: # # agent.pluggable_parser.default = Mechanize::Download # # Now all unknown content types will be saved to disk and not loaded into # memory. class Mechanize::PluggableParser CONTENT_TYPES = { :html => 'text/html', :wap => 'application/vnd.wap.xhtml+xml', :xhtml => 'application/xhtml+xml', :pdf => 'application/pdf', :csv => 'text/csv', :xml => ['text/xml', 'application/xml'], } InvalidContentTypeError = if defined?(MIME::Type::InvalidContentType) # For mime-types >=2.1 MIME::Type::InvalidContentType else # For mime-types <2.1 MIME::InvalidContentType end attr_accessor :default def initialize @parsers = { CONTENT_TYPES[:html] => Mechanize::Page, CONTENT_TYPES[:xhtml] => Mechanize::Page, CONTENT_TYPES[:wap] => Mechanize::Page, 'image' => Mechanize::Image, 'text/xml' => Mechanize::XmlFile, 'application/xml' => Mechanize::XmlFile, } @default = Mechanize::File end ## # Returns the parser registered for the given +content_type+ def parser content_type return default unless content_type parser = @parsers[content_type] return parser if parser mime_type = MIME::Type.new content_type parser = @parsers[mime_type.to_s] || @parsers[mime_type.simplified] || # Starting from mime-types 3.0 x-prefix is deprecated as per IANA (@parsers[MIME::Type.simplified(mime_type.to_s, remove_x_prefix: true)] rescue nil) || @parsers[mime_type.media_type] || default rescue InvalidContentTypeError default end def register_parser content_type, klass # :nodoc: @parsers[content_type] = klass end ## # Registers +klass+ as the parser for text/html and application/xhtml+xml # content def html=(klass) register_parser(CONTENT_TYPES[:html], klass) register_parser(CONTENT_TYPES[:xhtml], klass) end ## # Registers +klass+ as the parser for application/xhtml+xml content def xhtml=(klass) register_parser(CONTENT_TYPES[:xhtml], klass) end ## # Registers +klass+ as the parser for application/pdf content def pdf=(klass) register_parser(CONTENT_TYPES[:pdf], klass) end ## # Registers +klass+ as the parser for text/csv content def csv=(klass) register_parser(CONTENT_TYPES[:csv], klass) end ## # Registers +klass+ as the parser for text/xml content def xml=(klass) CONTENT_TYPES[:xml].each do |content_type| register_parser content_type, klass end end ## # Retrieves the parser for +content_type+ content def [](content_type) @parsers[content_type] end ## # Sets the parser for +content_type+ content to +klass+ # # The +content_type+ may either be a full MIME type a simplified MIME type # ('text/x-csv' simplifies to 'text/csv') or a media type like 'image'. def []= content_type, klass register_parser content_type, klass end end mechanize-2.7.5/lib/mechanize/file_response.rb0000644000004100000410000000253412772546476021443 0ustar www-datawww-data## # Fake response for dealing with file:/// requests class Mechanize::FileResponse def initialize(file_path) @file_path = file_path @uri = nil end def read_body raise Mechanize::ResponseCodeError.new(self) unless File.exist? @file_path if directory? yield dir_body else open @file_path, 'rb' do |io| yield io.read end end end def code File.exist?(@file_path) ? 200 : 404 end def content_length return dir_body.length if directory? File.exist?(@file_path) ? File.stat(@file_path).size : 0 end def each_header; end def [](key) return nil if key.casecmp('Content-Type') != 0 return 'text/html' if directory? return 'text/html' if ['.html', '.xhtml'].any? { |extn| @file_path.end_with?(extn) } nil end def each end def get_fields(key) [] end def http_version '0' end def message File.exist?(@file_path) ? 'OK' : 'Not Found' end def uri @uri ||= URI "file://#{@file_path}" end private def dir_body body = %w[] body.concat Dir[File.join(@file_path, '*')].map { |f| "#{File.basename(f)}" } body << %w[] body.join("\n").force_encoding(Encoding::BINARY) end def directory? File.directory?(@file_path) end end mechanize-2.7.5/lib/mechanize/response_code_error.rb0000644000004100000410000000137612772546476022652 0ustar www-datawww-data# This error is raised when Mechanize encounters a response code it does not # know how to handle. Currently, this exception will be thrown if Mechanize # encounters response codes other than 200, 301, or 302. Any other response # code is up to the user to handle. class Mechanize::ResponseCodeError < Mechanize::Error attr_reader :response_code attr_reader :page def initialize(page, message = nil) super message @page = page @response_code = page.code.to_s end def to_s response_class = Net::HTTPResponse::CODE_TO_OBJ[@response_code] out = "#{@response_code} => #{response_class} " out << "for #{@page.uri} " if @page.respond_to? :uri # may be HTTPResponse out << "-- #{super}" end alias inspect to_s end mechanize-2.7.5/lib/mechanize/page.rb0000644000004100000410000003461412772546476017526 0ustar www-datawww-data## # This class encapsulates an HTML page. If Mechanize finds a content # type of 'text/html', this class will be instantiated and returned. # # Example: # # require 'mechanize' # # agent = Mechanize.new # agent.get('http://google.com/').class # => Mechanize::Page class Mechanize::Page < Mechanize::File extend Forwardable extend Mechanize::ElementMatcher DEFAULT_RESPONSE = { 'content-type' => 'text/html', }.freeze attr_accessor :mech ## # Possible encodings for this page based on HTTP headers and meta elements attr_reader :encodings def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil) response ||= DEFAULT_RESPONSE @meta_content_type = nil @encoding = nil @encodings = [nil] raise 'no' if mech and not Mechanize === mech @mech = mech reset @encodings << Mechanize::Util.detect_charset(body) if body @encodings.concat self.class.response_header_charset(response) if body # Force the encoding to be 8BIT so we can perform regular expressions. # We'll set it to the detected encoding later body.force_encoding(Encoding::ASCII_8BIT) @encodings.concat self.class.meta_charset body meta_content_type = self.class.meta_content_type body @meta_content_type = meta_content_type if meta_content_type end @encodings << mech.default_encoding if mech and mech.default_encoding super uri, response, body, code end def title @title ||= if doc = parser title = doc.search('title').inner_text title.empty? ? nil : title end end def response_header_charset self.class.response_header_charset(response) end def meta_charset self.class.meta_charset(body) end def detected_encoding Mechanize::Util.detect_charset(body) end def encoding=(encoding) reset @encoding = encoding if @parser parser_encoding = @parser.encoding if parser_encoding && encoding && parser_encoding.casecmp(encoding) != 0 # lazy reinitialize the parser with the new encoding @parser = nil end end encoding end def encoding parser.encoding rescue NoMethodError nil end # Return whether parser result has errors related to encoding or not. # false indicates just parser has no encoding errors, not encoding is vaild. def encoding_error?(parser=nil) parser = self.parser unless parser return false if parser.errors.empty? parser.errors.any? do |error| error.message =~ /(indicate\ encoding)| (Invalid\ char)| (input\ conversion\ failed)/x end end def parser return @parser if @parser return unless @body url = @uri && @uri.to_s if @encoding @parser = mech.html_parser.parse html_body, url, @encoding elsif mech.force_default_encoding @parser = mech.html_parser.parse html_body, url, @mech.default_encoding else @encodings.reverse_each do |encoding| @parser = mech.html_parser.parse html_body, url, encoding break unless encoding_error? @parser end end @parser end alias :root :parser def pretty_print(q) # :nodoc: q.object_group(self) { q.breakable q.group(1, '{url', '}') {q.breakable; q.pp uri } q.breakable q.group(1, '{meta_refresh', '}') { meta_refresh.each { |link| q.breakable; q.pp link } } q.breakable q.group(1, '{title', '}') { q.breakable; q.pp title } q.breakable q.group(1, '{iframes', '}') { iframes.each { |link| q.breakable; q.pp link } } q.breakable q.group(1, '{frames', '}') { frames.each { |link| q.breakable; q.pp link } } q.breakable q.group(1, '{links', '}') { links.each { |link| q.breakable; q.pp link } } q.breakable q.group(1, '{forms', '}') { forms.each { |form| q.breakable; q.pp form } } } end alias inspect pretty_inspect # :nodoc: def reset @bases = nil @forms = nil @frames = nil @iframes = nil @links = nil @labels = nil @labels_hash = nil @meta_refresh = nil @parser = nil @title = nil end # Return the canonical URI for the page if there is a link tag # with href="canonical". def canonical_uri link = at('link[@rel="canonical"][@href]') return unless link href = link['href'] URI href rescue URI::InvalidURIError URI Mechanize::Util.uri_escape href end # Get the content type def content_type @meta_content_type || response['content-type'] end ## # :method: search # # Shorthand for +parser.search+. # # See Nokogiri::XML::Node#search for details. ## # :method: css # # Shorthand for +parser.css+. # # See also Nokogiri::XML::Node#css for details. ## # :method: xpath # # Shorthand for +parser.xpath+. # # See also Nokogiri::XML::Node#xpath for details. ## # :method: at # # Shorthand for +parser.at+. # # See also Nokogiri::XML::Node#at for details. ## # :method: at_css # # Shorthand for +parser.at_css+. # # See also Nokogiri::XML::Node#at_css for details. ## # :method: at_xpath # # Shorthand for +parser.at_xpath+. # # See also Nokogiri::XML::Node#at_xpath for details. def_delegators :parser, :search, :css, :xpath, :at, :at_css, :at_xpath alias / search alias % at ## # :method: form_with # # :call-seq: # form_with(criteria) # form_with(criteria) { |form| ... } # # Find a single form matching +criteria+. See +forms_with+ for # details of +criteria+. # # Examples: # page.form_with(action: '/post/login.php') do |f| # ... # end ## # :method: form_with!(criteria) # # :call-seq: # form_with!(criteria) # form_with!(criteria) { |form| ... } # # Same as +form_with+ but raises an ElementNotFoundError if no button matches # +criteria+ ## # :method: forms_with # # :call-seq: # forms_with(name) # forms_with(name: name_matcher, id: id_matcher, class: class_matcher, # search: search_expression, xpath: xpath_expression, css: css_expression, # action: action_matcher, ...) # # Find all forms form matching criteria. If a string is given, it # is taken as a name attribute value. If a hash is given, forms # are narrowed by the key-value pairs as follows. # # :id, :dom_id: selects forms with a #dom_id value that matches this # value. # # :class, :dom_class: selects forms with a #dom_class value that # matches this value. # # :search: only selects forms matching this selector expression. # # :xpath: only selects forms matching this XPath expression. # # :css: only selects forms matching this CSS selector expression. # # :action, :method, etc.: narrows forms by a given attribute value # using the === operator. # # Example: # page.forms_with(css: '#content table.login_box form', method: /\APOST\z/i, ).each do |f| # ... # end elements_with :form ## # :method: link_with # # :call-seq: # link_with(criteria) # link_with(criteria) { |link| ... } # # Find a single link matching +criteria+. See +forms_with+ for # details of +criteria+, where for "form(s)" read "link(s)". # # Example: # page.link_with(href: /foo/).click ## # :method: link_with! # # :call-seq: # link_with!(criteria) # link_with!(criteria) { |link| ... } # # Same as +link_with+ but raises an ElementNotFoundError if no button matches # +criteria+ ## # :method: links_with # # :call-seq: # links_with(criteria) # # Find all links matching +criteria+. See +forms_with+ for details # of +criteria+, where for "form(s)" read "link(s)". # # Example: # page.links_with(href: /foo/).each do |link| # puts link.href # end elements_with :link ## # :method: base_with # # :call-seq: # base_with(criteria) # base_with(criteria) { |base| ... } # # Find a single base tag matching +criteria+. See +forms_with+ for # details of +criteria+, where for "form(s)" read "base tag(s)". # # Example: # page.base_with(href: /foo/).click ## # :method: base_with!(criteria) # # :call-seq: # base_with!(criteria) # base_with!(criteria) { |base| ... } # # Same as +base_with+ but raises an ElementNotFoundError if no button matches # +criteria+ ## # :method: bases_with # # :call-seq: bases_with(criteria) # # Find all base tags matching +criteria+. See +forms_with+ for # details of +criteria+, where for "form(s)" read "base tag(s)". # # Example: # page.bases_with(href: /foo/).each do |base| # puts base.href # end elements_with :base ## # :method: frame_with # # :call-seq: # frame_with(criteria) # frame_with(criteria) { |frame| ... } # # Find a single frame tag matching +criteria+. See +forms_with+ for # details of +criteria+, where for "form(s)" read "frame tag(s)". # # Example: # page.frame_with(src: /foo/).click ## # :method: frame_with! # # :call-seq: # frame_with!(criteria) # frame_with!(criteria) { |frame| ... } # # Same as +frame_with+ but raises an ElementNotFoundError if no button matches # +criteria+ ## # :method: frames_with # # :call-seq: frames_with(criteria) # # Find all frame tags matching +criteria+. See +forms_with+ for # details of +criteria+, where for "form(s)" read "frame tag(s)". # # Example: # page.frames_with(src: /foo/).each do |frame| # p frame.src # end elements_with :frame ## # :method: iframe_with # # :call-seq: # iframe_with(criteria) # iframe_with(criteria) { |iframe| ... } # # Find a single iframe tag matching +criteria+. See +forms_with+ for # details of +criteria+, where for "form(s)" read "iframe tag(s)". # # Example: # page.iframe_with(src: /foo/).click ## # :method: iframe_with! # # :call-seq: # iframe_with!(criteria) # iframe_with!(criteria) { |iframe| ... } # # Same as +iframe_with+ but raises an ElementNotFoundError if no button # matches +criteria+ ## # :method: iframes_with # # :call-seq: iframes_with(criteria) # # Find all iframe tags matching +criteria+. See +forms_with+ for # details of +criteria+, where for "form(s)" read "iframe tag(s)". # # Example: # page.iframes_with(src: /foo/).each do |iframe| # p iframe.src # end elements_with :iframe ## # :method: image_with # # :call-seq: # image_with(criteria) # image_with(criteria) { |image| ... } # # Find a single image matching +criteria+. See +forms_with+ for # details of +criteria+, where for "form(s)" read "image(s)". # # Example: # page.image_with(alt: /main/).fetch.save ## # :method: image_with! # # :call-seq: # image_with!(criteria) # image_with!(criteria) { |image| ... } # # Same as +image_with+ but raises an ElementNotFoundError if no button matches # +criteria+ ## # :method: images_with # # :call-seq: images_with(criteria) # # Find all images matching +criteria+. See +forms_with+ for # details of +criteria+, where for "form(s)" read "image(s)". # # Example: # page.images_with(src: /jpg\Z/).each do |img| # img.fetch.save # end elements_with :image ## # Return a list of all link and area tags def links @links ||= %w{ a area }.map do |tag| search(tag).map do |node| Link.new(node, @mech, self) end end.flatten end ## # Return a list of all form tags def forms @forms ||= search('form').map do |html_form| form = Mechanize::Form.new(html_form, @mech, self) form.action ||= @uri.to_s form end end ## # Return a list of all meta refresh elements def meta_refresh query = @mech.follow_meta_refresh == :anywhere ? 'meta' : 'head > meta' @meta_refresh ||= search(query).map do |node| MetaRefresh.from_node node, self end.compact end ## # Return a list of all base tags def bases @bases ||= search('base').map { |node| Base.new(node, @mech, self) } end ## # Return a list of all frame tags def frames @frames ||= search('frame').map { |node| Frame.new(node, @mech, self) } end ## # Return a list of all iframe tags def iframes @iframes ||= search('iframe').map { |node| Frame.new(node, @mech, self) } end ## # Return a list of all img tags def images @images ||= search('img').map { |node| Image.new(node, self) } end def image_urls @image_urls ||= images.map(&:url).uniq end ## # Return a list of all label tags def labels @labels ||= search('label').map { |node| Label.new(node, self) } end def labels_hash unless @labels_hash hash = {} labels.each do |label| hash[label.node['for']] = label if label.for end @labels_hash = hash end return @labels_hash end class << self def charset content_type charset = content_type[/;(?:\s*,)?\s*charset\s*=\s*([^()<>@,;:\\\"\/\[\]?={}\s]+)/i, 1] return nil if charset == 'none' charset end alias charset_from_content_type charset end def self.response_header_charset response charsets = [] response.each do |header, value| next unless header == 'content-type' next unless value =~ /charset/i charsets << charset(value) end charsets end ## # Retrieves all charsets from +meta+ tags in +body+ def self.meta_charset body # HACK use .map body.scan(//i).map do |meta| if meta =~ /charset\s*=\s*(["'])?\s*(.+)\s*\1/i then $2 elsif meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then meta =~ /content\s*=\s*(["'])?(.*?)\1/i m_charset = charset $2 if $2 m_charset if m_charset end end.compact end ## # Retrieves the last content-type set by a +meta+ tag in +body+ def self.meta_content_type body body.scan(//i).reverse.map do |meta| if meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then meta =~ /content=(["'])?(.*?)\1/i return $2 end end nil end private def html_body if @body @body.empty? ? '' : @body else '' end end end require 'mechanize/headers' require 'mechanize/page/image' require 'mechanize/page/label' require 'mechanize/page/link' require 'mechanize/page/base' require 'mechanize/page/frame' require 'mechanize/page/meta_refresh' mechanize-2.7.5/lib/mechanize/http/0000755000004100000410000000000012772546476017234 5ustar www-datawww-datamechanize-2.7.5/lib/mechanize/http/auth_store.rb0000644000004100000410000000576012772546476021746 0ustar www-datawww-data## # A credential store for HTTP authentication. # # uri = URI 'http://example' # # store = Mechanize::HTTP::AuthStore.new # store.add_auth uri, 'user1', 'pass' # store.add_auth uri, 'user2', 'pass', 'realm' # # user, pass = store.credentials_for uri, 'realm' #=> 'user2', 'pass' # user, pass = store.credentials_for uri, 'other' #=> 'user1', 'pass' # # store.remove_auth uri # removes all credentials class Mechanize::HTTP::AuthStore attr_reader :auth_accounts # :nodoc: attr_reader :default_auth # :nodoc: ## # Creates a new AuthStore def initialize @auth_accounts = Hash.new do |h, uri| h[uri] = {} end @default_auth = nil end ## # Adds credentials +user+, +pass+ for the server at +uri+. If +realm+ is # set the credentials are used only for that realm. If +realm+ is not set # the credentials become the default for any realm on that URI. # # +domain+ and +realm+ are exclusive as NTLM does not follow RFC # 2617. If +domain+ is given it is only used for NTLM authentication. def add_auth uri, user, pass, realm = nil, domain = nil uri = URI uri unless URI === uri raise ArgumentError, 'NTLM domain given with realm which NTLM does not use' if realm and domain uri += '/' auth_accounts[uri][realm] = [user, pass, domain] self end ## # USE OF add_default_auth IS NOT RECOMMENDED AS IT MAY EXPOSE PASSWORDS TO # THIRD PARTIES # # Adds credentials +user+, +pass+ as the default authentication credentials. # If no other credentials are available these will be returned from # credentials_for. # # If +domain+ is given it is only used for NTLM authentication. def add_default_auth user, pass, domain = nil warn <<-WARN You have supplied default authentication credentials that apply to ANY SERVER. Your username and password can be retrieved by ANY SERVER using Basic authentication. THIS EXPOSES YOUR USERNAME AND PASSWORD TO DISCLOSURE WITHOUT YOUR KNOWLEDGE. Use add_auth to set authentication credentials that will only be delivered only to a particular server you specify. WARN @default_auth = [user, pass, domain] end ## # Returns true if credentials exist for the +challenges+ from the server at # +uri+. def credentials? uri, challenges challenges.any? do |challenge| credentials_for uri, challenge.realm_name end end ## # Retrieves credentials for +realm+ on the server at +uri+. def credentials_for uri, realm uri = URI uri unless URI === uri uri += '/' uri.user = nil uri.password = nil realms = @auth_accounts[uri] realms[realm] || realms[nil] || @default_auth end ## # Removes credentials for +realm+ on the server at +uri+. If +realm+ is not # set all credentials for the server at +uri+ are removed. def remove_auth uri, realm = nil uri = URI uri unless URI === uri uri += '/' if realm then auth_accounts[uri].delete realm else auth_accounts.delete uri end self end end mechanize-2.7.5/lib/mechanize/http/agent.rb0000644000004100000410000010505212772546476020662 0ustar www-datawww-datarequire 'tempfile' require 'net/ntlm' require 'kconv' require 'webrobots' ## # An HTTP (and local disk access) user agent. This class is an implementation # detail and is subject to change at any time. class Mechanize::HTTP::Agent # :section: Headers # Disables If-Modified-Since conditional requests (enabled by default) attr_accessor :conditional_requests # Is gzip compression of requests enabled? attr_accessor :gzip_enabled # A hash of request headers to be used for every request attr_accessor :request_headers # The User-Agent header to send attr_reader :user_agent # :section: History # history of requests made attr_accessor :history # :section: Hooks # A list of hooks to call after retrieving a response. Hooks are called with # the agent and the response returned. attr_reader :post_connect_hooks # A list of hooks to call before making a request. Hooks are called with # the agent and the request to be performed. attr_reader :pre_connect_hooks # A list of hooks to call to handle the content-encoding of a request. attr_reader :content_encoding_hooks # :section: HTTP Authentication attr_reader :auth_store # :nodoc: attr_reader :authenticate_methods # :nodoc: attr_reader :digest_challenges # :nodoc: # :section: Redirection # Follow HTML meta refresh and HTTP Refresh. If set to +:anywhere+ meta # refresh tags outside of the head element will be followed. attr_accessor :follow_meta_refresh # Follow an HTML meta refresh that has no "url=" in the content attribute. # # Defaults to false to prevent infinite refresh loops. attr_accessor :follow_meta_refresh_self # Controls how this agent deals with redirects. The following values are # allowed: # # :all, true:: All 3xx redirects are followed (default) # :permanent:: Only 301 Moved Permanantly redirects are followed # false:: No redirects are followed attr_accessor :redirect_ok # Maximum number of redirects to follow attr_accessor :redirection_limit # :section: Allowed error codes # List of error codes (in String or Integer) to handle without # raising Mechanize::ResponseCodeError, defaulted to an empty array. # Note that 2xx, 3xx and 401 status codes will be handled without # checking this list. attr_accessor :allowed_error_codes # :section: Robots # When true, this agent will consult the site's robots.txt for each access. attr_reader :robots # Mutex used when fetching robots.txt attr_reader :robots_mutex # :section: SSL # OpenSSL key password attr_accessor :pass # :section: Timeouts # Set to false to disable HTTP/1.1 keep-alive requests attr_accessor :keep_alive # Length of time to wait until a connection is opened in seconds attr_accessor :open_timeout # Length of time to attempt to read data from the server attr_accessor :read_timeout # :section: # The cookies for this agent attr_accessor :cookie_jar # Responses larger than this will be written to a Tempfile instead of stored # in memory. Setting this to nil disables creation of Tempfiles. attr_accessor :max_file_buffer # :section: Utility # The context parses responses into pages attr_accessor :context attr_reader :http # :nodoc: # When set to true mechanize will ignore an EOF during chunked transfer # encoding so long as at least one byte was received. Be careful when # enabling this as it may cause data loss. attr_accessor :ignore_bad_chunking # Handlers for various URI schemes attr_accessor :scheme_handlers # :section: # Creates a new Mechanize HTTP user agent. The user agent is an # implementation detail of mechanize and its API may change at any time. # The connection_name can be used to segregate SSL connections. # Agents with different names will not share the same persistent connection. def initialize(connection_name = 'mechanize') @allowed_error_codes = [] @conditional_requests = true @context = nil @content_encoding_hooks = [] @cookie_jar = Mechanize::CookieJar.new @follow_meta_refresh = false @follow_meta_refresh_self = false @gzip_enabled = true @history = Mechanize::History.new @ignore_bad_chunking = false @keep_alive = true @max_file_buffer = 100_000 # 5MB for response bodies @open_timeout = nil @post_connect_hooks = [] @pre_connect_hooks = [] @read_timeout = nil @redirect_ok = true @redirection_limit = 20 @request_headers = {} @robots = false @robots_mutex = Mutex.new @user_agent = nil @webrobots = nil # HTTP Authentication @auth_store = Mechanize::HTTP::AuthStore.new @authenticate_parser = Mechanize::HTTP::WWWAuthenticateParser.new @authenticate_methods = Hash.new do |methods, uri| methods[uri] = Hash.new do |realms, auth_scheme| realms[auth_scheme] = [] end end @digest_auth = Net::HTTP::DigestAuth.new @digest_challenges = {} # SSL @pass = nil @scheme_handlers = Hash.new { |h, scheme| h[scheme] = lambda { |link, page| raise Mechanize::UnsupportedSchemeError.new(scheme, link) } } @scheme_handlers['http'] = lambda { |link, page| link } @scheme_handlers['https'] = @scheme_handlers['http'] @scheme_handlers['relative'] = @scheme_handlers['http'] @scheme_handlers['file'] = @scheme_handlers['http'] @http = Net::HTTP::Persistent.new connection_name @http.idle_timeout = 5 @http.keep_alive = 300 end ## # Adds credentials +user+, +pass+ for +uri+. If +realm+ is set the # credentials are used only for that realm. If +realm+ is not set the # credentials become the default for any realm on that URI. # # +domain+ and +realm+ are exclusive as NTLM does not follow RFC 2617. If # +domain+ is given it is only used for NTLM authentication. def add_auth uri, user, password, realm = nil, domain = nil @auth_store.add_auth uri, user, password, realm, domain end ## # USE OF add_default_auth IS NOT RECOMMENDED AS IT MAY EXPOSE PASSWORDS TO # THIRD PARTIES # # Adds credentials +user+, +pass+ as the default authentication credentials. # If no other credentials are available these will be returned from # credentials_for. # # If +domain+ is given it is only used for NTLM authentication. def add_default_auth user, password, domain = nil # :nodoc: @auth_store.add_default_auth user, password, domain end ## # Retrieves +uri+ and parses it into a page or other object according to # PluggableParser. If the URI is an HTTP or HTTPS scheme URI the given HTTP # +method+ is used to retrieve it, along with the HTTP +headers+, request # +params+ and HTTP +referer+. # # The final URI to access is built with +uri+ and +params+, the # latter of which is formatted into a string using # Mechanize::Util.build_query_string, which see. # # +redirects+ tracks the number of redirects experienced when retrieving the # page. If it is over the redirection_limit an error will be raised. def fetch uri, method = :get, headers = {}, params = [], referer = current_page, redirects = 0 referer_uri = referer ? referer.uri : nil uri = resolve uri, referer uri, params = resolve_parameters uri, method, params request = http_request uri, method, params connection = connection_for uri request_auth request, uri disable_keep_alive request enable_gzip request request_language_charset request request_cookies request, uri request_host request, uri request_referer request, uri, referer_uri request_user_agent request request_add_headers request, headers pre_connect request # Consult robots.txt if robots && uri.is_a?(URI::HTTP) robots_allowed?(uri) or raise Mechanize::RobotsDisallowedError.new(uri) end # Add If-Modified-Since if page is in history if page = visited_page(uri) and last_modified = page.response['Last-Modified'] request['If-Modified-Since'] = last_modified end if @conditional_requests # Specify timeouts if supplied and our connection supports them if @open_timeout && connection.respond_to?(:open_timeout=) connection.open_timeout = @open_timeout end if @read_timeout && connection.respond_to?(:read_timeout=) connection.read_timeout = @read_timeout end request_log request response_body_io = nil # Send the request begin response = connection.request(uri, request) { |res| response_log res response_body_io = response_read res, request, uri res } rescue Mechanize::ChunkedTerminationError => e raise unless @ignore_bad_chunking response = e.response response_body_io = e.body_io end hook_content_encoding response, uri, response_body_io response_body_io = response_content_encoding response, response_body_io if request.response_body_permitted? post_connect uri, response, response_body_io page = response_parse response, response_body_io, uri response_cookies response, uri, page meta = response_follow_meta_refresh response, uri, page, redirects return meta if meta if robots && page.is_a?(Mechanize::Page) page.parser.noindex? and raise Mechanize::RobotsDisallowedError.new(uri) end case response when Net::HTTPSuccess page when Mechanize::FileResponse page when Net::HTTPNotModified log.debug("Got cached page") if log visited_page(uri) || page when Net::HTTPRedirection response_redirect response, method, page, redirects, headers, referer when Net::HTTPUnauthorized response_authenticate(response, page, uri, request, headers, params, referer) else if @allowed_error_codes.any? {|code| code.to_s == page.code} then page else raise Mechanize::ResponseCodeError.new(page, 'unhandled response') end end end # URI for a proxy connection def proxy_uri @http.proxy_uri end # Retry non-idempotent requests? def retry_change_requests @http.retry_change_requests end # Retry non-idempotent requests def retry_change_requests= retri @http.retry_change_requests = retri end # :section: Headers def user_agent= user_agent @webrobots = nil if user_agent != @user_agent @user_agent = user_agent end # :section: History # Equivalent to the browser back button. Returns the most recent page # visited. def back @history.pop end ## # Returns the latest page loaded by the agent def current_page @history.last end # Returns the maximum size for the history stack. def max_history @history.max_size end # Set the maximum size for the history stack. def max_history=(length) @history.max_size = length end # Returns a visited page for the url passed in, otherwise nil def visited_page url @history.visited_page resolve url end # :section: Hooks def hook_content_encoding response, uri, response_body_io @content_encoding_hooks.each do |hook| hook.call self, uri, response, response_body_io end end ## # Invokes hooks added to post_connect_hooks after a +response+ is returned # and the response +body+ is handled. # # Yields the +context+, the +uri+ for the request, the +response+ and the # response +body+. def post_connect uri, response, body_io # :yields: agent, uri, response, body @post_connect_hooks.each do |hook| begin hook.call self, uri, response, body_io.read ensure body_io.rewind end end end ## # Invokes hooks added to pre_connect_hooks before a +request+ is made. # Yields the +agent+ and the +request+ that will be performed to each hook. def pre_connect request # :yields: agent, request @pre_connect_hooks.each do |hook| hook.call self, request end end # :section: Request def connection_for uri case uri.scheme.downcase when 'http', 'https' then return @http when 'file' then return Mechanize::FileConnection.new end end # Closes all open connections for this agent. def shutdown http.shutdown end ## # Decodes a gzip-encoded +body_io+. If it cannot be decoded, inflate is # tried followed by raising an error. def content_encoding_gunzip body_io log.debug('gzip response') if log zio = Zlib::GzipReader.new body_io out_io = auto_io 'mechanize-gunzip', 16384, zio zio.finish return out_io rescue Zlib::Error => gz_error log.warn "unable to gunzip response: #{gz_error} (#{gz_error.class})" if log body_io.rewind body_io.read 10 begin log.warn "trying raw inflate on response" if log return inflate body_io, -Zlib::MAX_WBITS rescue Zlib::Error => e log.error "unable to inflate response: #{e} (#{e.class})" if log raise end ensure # do not close a second time if we failed the first time zio.close if zio and !(zio.closed? or gz_error) body_io.close unless body_io.closed? end ## # Decodes a deflate-encoded +body_io+. If it cannot be decoded, raw inflate # is tried followed by raising an error. def content_encoding_inflate body_io log.debug('deflate body') if log return inflate body_io rescue Zlib::Error log.error('unable to inflate response, trying raw deflate') if log body_io.rewind begin return inflate body_io, -Zlib::MAX_WBITS rescue Zlib::Error => e log.error("unable to inflate response: #{e}") if log raise end ensure body_io.close end def disable_keep_alive request request['connection'] = 'close' unless @keep_alive end def enable_gzip request request['accept-encoding'] = if @gzip_enabled 'gzip,deflate,identity' else 'identity' end end def http_request uri, method, params = nil case uri.scheme.downcase when 'http', 'https' then klass = Net::HTTP.const_get(method.to_s.capitalize) request ||= klass.new(uri.request_uri) request.body = params.first if params request when 'file' then Mechanize::FileRequest.new uri end end def request_add_headers request, headers = {} @request_headers.each do |k,v| request[k] = v end headers.each do |field, value| case field when :etag then request["ETag"] = value when :if_modified_since then request["If-Modified-Since"] = value when Symbol then raise ArgumentError, "unknown header symbol #{field}" else request[field] = value end end end def request_auth request, uri base_uri = uri + '/' base_uri.user = nil base_uri.password = nil schemes = @authenticate_methods[base_uri] if realm = schemes[:digest].find { |r| r.uri == base_uri } then request_auth_digest request, uri, realm, base_uri, false elsif realm = schemes[:iis_digest].find { |r| r.uri == base_uri } then request_auth_digest request, uri, realm, base_uri, true elsif realm = schemes[:basic].find { |r| r.uri == base_uri } then user, password, = @auth_store.credentials_for uri, realm.realm request.basic_auth user, password end end def request_auth_digest request, uri, realm, base_uri, iis challenge = @digest_challenges[realm] uri.user, uri.password, = @auth_store.credentials_for uri, realm.realm auth = @digest_auth.auth_header uri, challenge.to_s, request.method, iis request['Authorization'] = auth end def request_cookies request, uri return if @cookie_jar.empty? uri cookies = @cookie_jar.cookies uri return if cookies.empty? request.add_field 'Cookie', cookies.join('; ') end def request_host request, uri port = [80, 443].include?(uri.port.to_i) ? nil : uri.port host = uri.host request['Host'] = [host, port].compact.join ':' end def request_language_charset request request['accept-charset'] = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7' request['accept-language'] = 'en-us,en;q=0.5' end # Log specified headers for the request def request_log request return unless log log.info("#{request.class}: #{request.path}") request.each_header do |k, v| log.debug("request-header: #{k} => #{v}") end end # Sets a Referer header. Fragment part is removed as demanded by # RFC 2616 14.36, and user information part is removed just like # major browsers do. def request_referer request, uri, referer return unless referer return if 'https'.casecmp(referer.scheme) == 0 and 'https'.casecmp(uri.scheme) != 0 if referer.fragment || referer.user || referer.password referer = referer.dup referer.fragment = referer.user = referer.password = nil end request['Referer'] = referer end def request_user_agent request request['User-Agent'] = @user_agent if @user_agent end def resolve(uri, referer = current_page) referer_uri = referer && referer.uri if uri.is_a?(URI) uri = uri.dup elsif uri.nil? if referer_uri return referer_uri end raise ArgumentError, "absolute URL needed (not nil)" else url = uri.to_s.strip if url.empty? if referer_uri return referer_uri.dup.tap { |u| u.fragment = nil } end raise ArgumentError, "absolute URL needed (not #{uri.inspect})" end url.gsub!(/[^#{0.chr}-#{126.chr}]/o) { |match| Mechanize::Util.uri_escape(match) } escaped_url = Mechanize::Util.html_unescape( url.split(/((?:%[0-9A-Fa-f]{2})+|#)/).each_slice(2).map { |x, y| "#{WEBrick::HTTPUtils.escape(x)}#{y}" }.join('') ) begin uri = URI.parse(escaped_url) rescue uri = URI.parse(WEBrick::HTTPUtils.escape(escaped_url)) end end uri.host = referer_uri.host if referer_uri && URI::HTTP === uri && uri.host.nil? scheme = uri.relative? ? 'relative' : uri.scheme.downcase uri = @scheme_handlers[scheme].call(uri, referer) if referer_uri if uri.path.length == 0 && uri.relative? uri.path = referer_uri.path end end uri.path = '/' if uri.path.length == 0 if uri.relative? raise ArgumentError, "absolute URL needed (not #{uri})" unless referer_uri if referer.respond_to?(:bases) && referer.parser && (lbase = referer.bases.last) && lbase.uri && lbase.uri.absolute? base = lbase else base = nil end base = referer_uri + (base ? base.uri : referer_uri) # Workaround for URI's bug in that it squashes consecutive # slashes. See #304. if uri.path.match(%r{\A(.*?/)(?!/\.\.?(?!/))(/.*)\z}i) uri = URI((base + $1).to_s + $2) else uri = base + uri end # Strip initial "/.." bits from the path uri.path.sub!(/^(\/\.\.)+(?=\/)/, '') end unless ['http', 'https', 'file'].include?(uri.scheme.downcase) raise ArgumentError, "unsupported scheme: #{uri.scheme}" end uri end def secure_resolve!(uri, referer = current_page) new_uri = resolve(uri, referer) if (referer_uri = referer && referer.uri) && referer_uri.scheme != 'file'.freeze && new_uri.scheme == 'file'.freeze raise Mechanize::Error, "insecure redirect to a file URI" end new_uri end def resolve_parameters uri, method, parameters case method when :head, :get, :delete, :trace then if parameters and parameters.length > 0 uri.query ||= '' uri.query << '&' if uri.query.length > 0 uri.query << Mechanize::Util.build_query_string(parameters) end return uri, nil end return uri, parameters end # :section: Response def get_meta_refresh response, uri, page return nil unless @follow_meta_refresh if page.respond_to?(:meta_refresh) and (redirect = page.meta_refresh.first) then [redirect.delay, redirect.href] unless not @follow_meta_refresh_self and redirect.link_self elsif refresh = response['refresh'] delay, href, link_self = Mechanize::Page::MetaRefresh.parse refresh, uri raise Mechanize::Error, 'Invalid refresh http header' unless delay [delay.to_f, href] unless not @follow_meta_refresh_self and link_self end end def response_authenticate(response, page, uri, request, headers, params, referer) www_authenticate = response['www-authenticate'] unless www_authenticate = response['www-authenticate'] then message = 'WWW-Authenticate header missing in response' raise Mechanize::UnauthorizedError.new(page, nil, message) end challenges = @authenticate_parser.parse www_authenticate unless @auth_store.credentials? uri, challenges then message = "no credentials found, provide some with #add_auth" raise Mechanize::UnauthorizedError.new(page, challenges, message) end if challenge = challenges.find { |c| c.scheme =~ /^Digest$/i } then realm = challenge.realm uri auth_scheme = if response['server'] =~ /Microsoft-IIS/ then :iis_digest else :digest end existing_realms = @authenticate_methods[realm.uri][auth_scheme] if existing_realms.include? realm message = 'Digest authentication failed' raise Mechanize::UnauthorizedError.new(page, challenges, message) end existing_realms << realm @digest_challenges[realm] = challenge elsif challenge = challenges.find { |c| c.scheme == 'NTLM' } then existing_realms = @authenticate_methods[uri + '/'][:ntlm] if existing_realms.include?(realm) and not challenge.params then message = 'NTLM authentication failed' raise Mechanize::UnauthorizedError.new(page, challenges, message) end existing_realms << realm if challenge.params then type_2 = Net::NTLM::Message.decode64 challenge.params user, password, domain = @auth_store.credentials_for uri, nil type_3 = type_2.response({ :user => user, :password => password, :domain => domain }, { :ntlmv2 => true }).encode64 headers['Authorization'] = "NTLM #{type_3}" else type_1 = Net::NTLM::Message::Type1.new.encode64 headers['Authorization'] = "NTLM #{type_1}" end elsif challenge = challenges.find { |c| c.scheme == 'Basic' } then realm = challenge.realm uri existing_realms = @authenticate_methods[realm.uri][:basic] if existing_realms.include? realm then message = 'Basic authentication failed' raise Mechanize::UnauthorizedError.new(page, challenges, message) end existing_realms << realm else message = 'unsupported authentication scheme' raise Mechanize::UnauthorizedError.new(page, challenges, message) end fetch uri, request.method.downcase.to_sym, headers, params, referer end def response_content_encoding response, body_io length = response.content_length || case body_io when Tempfile, IO then body_io.stat.size else body_io.length end return body_io if length.zero? out_io = case response['Content-Encoding'] when nil, 'none', '7bit', "" then body_io when 'deflate' then content_encoding_inflate body_io when 'gzip', 'x-gzip' then content_encoding_gunzip body_io else raise Mechanize::Error, "unsupported content-encoding: #{response['Content-Encoding']}" end out_io.flush out_io.rewind out_io rescue Zlib::Error => e message = "error handling content-encoding #{response['Content-Encoding']}:" message << " #{e.message} (#{e.class})" raise Mechanize::Error, message ensure begin if Tempfile === body_io and (StringIO === out_io or (out_io and out_io.path != body_io.path)) then body_io.close! end rescue IOError # HACK ruby 1.8 raises IOError when closing the stream end end def response_cookies response, uri, page if Mechanize::Page === page and page.body =~ /Set-Cookie/n page.search('//head/meta[@http-equiv="Set-Cookie"]').each do |meta| save_cookies(uri, meta['content']) end end header_cookies = response.get_fields 'Set-Cookie' return unless header_cookies header_cookies.each do |set_cookie| save_cookies(uri, set_cookie) end end def save_cookies(uri, set_cookie) return [] if set_cookie.nil? if log = log() # reduce method calls @cookie_jar.parse(set_cookie, uri, :logger => log) { |c| log.debug("saved cookie: #{c}") true } else @cookie_jar.parse(set_cookie, uri) end end def response_follow_meta_refresh response, uri, page, redirects delay, new_url = get_meta_refresh(response, uri, page) return nil unless delay new_url = new_url ? secure_resolve!(new_url, page) : uri raise Mechanize::RedirectLimitReachedError.new(page, redirects) if redirects + 1 > @redirection_limit sleep delay @history.push(page, page.uri) fetch new_url, :get, {}, [], Mechanize::Page.new, redirects + 1 end def response_log response return unless log log.info("status: #{response.class} #{response.http_version} " \ "#{response.code} #{response.message}") response.each_header do |k, v| log.debug("response-header: #{k} => #{v}") end end def response_parse response, body_io, uri @context.parse uri, response, body_io end def response_read response, request, uri content_length = response.content_length if use_tempfile? content_length then body_io = make_tempfile 'mechanize-raw' else body_io = StringIO.new.set_encoding(Encoding::BINARY) end total = 0 begin response.read_body { |part| total += part.length if StringIO === body_io and use_tempfile? total then new_io = make_tempfile 'mechanize-raw' new_io.write body_io.string body_io = new_io end body_io.write(part) log.debug("Read #{part.length} bytes (#{total} total)") if log } rescue EOFError => e # terminating CRLF might be missing, let the user check the document raise unless response.chunked? and total.nonzero? body_io.rewind raise Mechanize::ChunkedTerminationError.new(e, response, body_io, uri, @context) rescue Net::HTTP::Persistent::Error, Errno::ECONNRESET => e body_io.rewind raise Mechanize::ResponseReadError.new(e, response, body_io, uri, @context) end body_io.flush body_io.rewind raise Mechanize::ResponseCodeError.new(response, uri) if Net::HTTPUnknownResponse === response content_length = response.content_length unless Net::HTTP::Head === request or Net::HTTPRedirection === response then if content_length and content_length != body_io.length err = EOFError.new("Content-Length (#{content_length}) does not " \ "match response body length (#{body_io.length})") raise Mechanize::ResponseReadError.new(err, response, body_io, uri, @context) end end body_io end def response_redirect(response, method, page, redirects, headers, referer = current_page) case @redirect_ok when true, :all # shortcut when false, nil return page when :permanent return page unless Net::HTTPMovedPermanently === response end log.info("follow redirect to: #{response['Location']}") if log raise Mechanize::RedirectLimitReachedError.new(page, redirects) if redirects + 1 > @redirection_limit redirect_method = method == :head ? :head : :get # Make sure we are not copying over the POST headers from the original request ['Content-Length', 'Content-MD5', 'Content-Type'].each do |key| headers.delete key end new_uri = secure_resolve! response['Location'].to_s, page @history.push(page, page.uri) fetch new_uri, redirect_method, headers, [], referer, redirects + 1 end # :section: Robots RobotsKey = :__mechanize_get_robots__ def get_robots(uri) # :nodoc: robots_mutex.synchronize do Thread.current[RobotsKey] = true begin fetch(uri).body rescue Mechanize::ResponseCodeError => e case e.response_code when /\A4\d\d\z/ '' else raise e end rescue Mechanize::RedirectLimitReachedError '' ensure Thread.current[RobotsKey] = false end end end def robots= value require 'webrobots' if value @webrobots = nil if value != @robots @robots = value end ## # Tests if this agent is allowed to access +url+, consulting the site's # robots.txt. def robots_allowed? uri return true if Thread.current[RobotsKey] webrobots.allowed? uri end # Opposite of robots_allowed? def robots_disallowed? url !robots_allowed? url end # Returns an error object if there is an error in fetching or parsing # robots.txt of the site +url+. def robots_error(url) webrobots.error(url) end # Raises the error if there is an error in fetching or parsing robots.txt of # the site +url+. def robots_error!(url) webrobots.error!(url) end # Removes robots.txt cache for the site +url+. def robots_reset(url) webrobots.reset(url) end def webrobots @webrobots ||= WebRobots.new(@user_agent, :http_get => method(:get_robots)) end # :section: SSL # Path to an OpenSSL CA certificate file def ca_file @http.ca_file end # Sets the path to an OpenSSL CA certificate file def ca_file= ca_file @http.ca_file = ca_file end # The SSL certificate store used for validating connections def cert_store @http.cert_store end # Sets the SSL certificate store used for validating connections def cert_store= cert_store @http.cert_store = cert_store end # The client X509 certificate def certificate @http.certificate end # Sets the client certificate to given X509 certificate. If a path is given # the certificate will be loaded and set. def certificate= certificate certificate = if OpenSSL::X509::Certificate === certificate then certificate else OpenSSL::X509::Certificate.new File.read certificate end @http.certificate = certificate end # An OpenSSL private key or the path to a private key def private_key @http.private_key end # Sets the client's private key def private_key= private_key private_key = if OpenSSL::PKey::PKey === private_key then private_key else OpenSSL::PKey::RSA.new File.read(private_key), @pass end @http.private_key = private_key end # SSL version to use def ssl_version @http.ssl_version end # Sets the SSL version to use def ssl_version= ssl_version @http.ssl_version = ssl_version end # A callback for additional certificate verification. See # OpenSSL::SSL::SSLContext#verify_callback # # The callback can be used for debugging or to ignore errors by always # returning +true+. Specifying nil uses the default method that was valid # when the SSLContext was created def verify_callback @http.verify_callback end # Sets the certificate verify callback def verify_callback= verify_callback @http.verify_callback = verify_callback end # How to verify SSL connections. Defaults to VERIFY_PEER def verify_mode @http.verify_mode end # Sets the mode for verifying SSL connections def verify_mode= verify_mode @http.verify_mode = verify_mode end # :section: Timeouts # Reset connections that have not been used in this many seconds def idle_timeout @http.idle_timeout end # Sets the connection idle timeout for persistent connections def idle_timeout= timeout @http.idle_timeout = timeout end # :section: Utility ## # Creates a new output IO by reading +input_io+ in +read_size+ chunks. If # the output is over the max_file_buffer size a Tempfile with +name+ is # created. # # If a block is provided, each chunk of +input_io+ is yielded for further # processing. def auto_io name, read_size, input_io out_io = StringIO.new.set_encoding(Encoding::BINARY) until input_io.eof? do if StringIO === out_io and use_tempfile? out_io.size then new_io = make_tempfile name new_io.write out_io.string out_io = new_io end chunk = input_io.read read_size chunk = yield chunk if block_given? out_io.write chunk end out_io.rewind out_io end def inflate compressed, window_bits = nil inflate = Zlib::Inflate.new window_bits out_io = auto_io 'mechanize-inflate', 1024, compressed do |chunk| inflate.inflate chunk end inflate.finish out_io ensure inflate.close if inflate.finished? end def log @context.log end ## # Sets the proxy address, port, user, and password +addr+ should be a host, # with no "http://", +port+ may be a port number, service name or port # number string. def set_proxy addr, port, user = nil, pass = nil unless addr and port then @http.proxy = nil return end unless Integer === port then begin port = Socket.getservbyname port rescue SocketError begin port = Integer port rescue ArgumentError raise ArgumentError, "invalid value for port: #{port.inspect}" end end end proxy_uri = URI "http://#{addr}" proxy_uri.port = port proxy_uri.user = user if user proxy_uri.password = pass if pass @http.proxy = proxy_uri end def make_tempfile name io = Tempfile.new name io.unlink io.binmode io end def use_tempfile? size return false unless @max_file_buffer return false unless size size >= @max_file_buffer end def reset @cookie_jar.clear @history.clear end end require 'mechanize/http/auth_store' mechanize-2.7.5/lib/mechanize/http/content_disposition_parser.rb0000644000004100000410000000774712772546476025252 0ustar www-datawww-data# coding: BINARY require 'strscan' require 'time' class Mechanize::HTTP ContentDisposition = Struct.new :type, :filename, :creation_date, :modification_date, :read_date, :size, :parameters end ## # Parser Content-Disposition headers that loosely follows RFC 2183. # # Beyond RFC 2183, this parser allows: # # * Missing disposition-type # * Multiple semicolons # * Whitespace around semicolons class Mechanize::HTTP::ContentDispositionParser attr_accessor :scanner # :nodoc: @parser = nil ## # Parses the disposition type and params in the +content_disposition+ # string. The "Content-Disposition:" must be removed. def self.parse content_disposition @parser ||= self.new @parser.parse content_disposition end ## # Creates a new parser Content-Disposition headers def initialize @scanner = nil end ## # Parses the +content_disposition+ header. If +header+ is set to true the # "Content-Disposition:" portion will be parsed def parse content_disposition, header = false return nil if content_disposition.empty? @scanner = StringScanner.new content_disposition if header then return nil unless @scanner.scan(/Content-Disposition/i) return nil unless @scanner.scan(/:/) spaces end type = rfc_2045_token @scanner.scan(/;+/) if @scanner.peek(1) == '=' then @scanner.pos = 0 type = nil end disposition = Mechanize::HTTP::ContentDisposition.new type spaces return nil unless parameters = parse_parameters disposition.filename = parameters.delete 'filename' disposition.creation_date = parameters.delete 'creation-date' disposition.modification_date = parameters.delete 'modification-date' disposition.read_date = parameters.delete 'read-date' disposition.size = parameters.delete 'size' disposition.parameters = parameters disposition end ## # Extracts disposition-parm and returns a Hash. def parse_parameters parameters = {} while true do return nil unless param = rfc_2045_token param.downcase! return nil unless @scanner.scan(/=/) value = case param when /^filename$/ then rfc_2045_value when /^(creation|modification|read)-date$/ then Time.rfc822 rfc_2045_quoted_string when /^size$/ then rfc_2045_value.to_i(10) else rfc_2045_value end return nil unless value parameters[param] = value spaces break if @scanner.eos? or not @scanner.scan(/;+/) spaces end parameters end ## # quoted-string = <"> *(qtext/quoted-pair) <"> # qtext = , "\" & CR, # and including linear-white-space # quoted-pair = "\" CHAR # # Parses an RFC 2045 quoted-string def rfc_2045_quoted_string return nil unless @scanner.scan(/"/) text = '' while true do chunk = @scanner.scan(/[\000-\014\016-\041\043-\133\135-\177]+/) # not \r " if chunk then text << chunk if @scanner.peek(1) == '\\' then @scanner.get_byte return nil if @scanner.eos? text << @scanner.get_byte elsif @scanner.scan(/\r\n[\t ]+/) then text << " " end else if '"' == @scanner.peek(1) then @scanner.get_byte break else return nil end end end text end ## # token := 1* # # Parses an RFC 2045 token def rfc_2045_token @scanner.scan(/[^\000-\037\177()<>@,;:\\"\/\[\]?= ]+/) end ## # value := token / quoted-string # # Parses an RFC 2045 value def rfc_2045_value if @scanner.peek(1) == '"' then rfc_2045_quoted_string else rfc_2045_token end end ## # 1*SP # # Parses spaces def spaces @scanner.scan(/ +/) end end mechanize-2.7.5/lib/mechanize/http/www_authenticate_parser.rb0000644000004100000410000000640712772546476024526 0ustar www-datawww-data# coding: BINARY require 'strscan' ## # Parses the WWW-Authenticate HTTP header into separate challenges. class Mechanize::HTTP::WWWAuthenticateParser attr_accessor :scanner # :nodoc: ## # Creates a new header parser for WWW-Authenticate headers def initialize @scanner = nil end ## # Parsers the header. Returns an Array of challenges as strings def parse www_authenticate challenges = [] @scanner = StringScanner.new www_authenticate while true do break if @scanner.eos? start = @scanner.pos challenge = Mechanize::HTTP::AuthChallenge.new scheme = auth_scheme if scheme == 'Negotiate' scan_comma_spaces end next unless scheme challenge.scheme = scheme space = spaces if scheme == 'NTLM' then if space then challenge.params = @scanner.scan(/.*/) end challenge.raw = www_authenticate[start, @scanner.pos] challenges << challenge next else scheme.capitalize! end next unless space params = {} while true do pos = @scanner.pos name, value = auth_param name.downcase! if name =~ /^realm$/i unless name then challenge.params = params challenges << challenge if @scanner.eos? then challenge.raw = www_authenticate[start, @scanner.pos] break end @scanner.pos = pos # rewind challenge.raw = www_authenticate[start, @scanner.pos].sub(/(,+)? *$/, '') challenge = nil # a token should be next, new challenge break else params[name] = value end spaces @scanner.scan(/(, *)+/) end end challenges end ## # 1*SP # # Parses spaces def spaces @scanner.scan(/ +/) end ## # scans a comma followed by spaces # needed for Negotiation, NTLM # def scan_comma_spaces @scanner.scan(/, +/) end ## # token = 1* # # Parses a token def token @scanner.scan(/[^\000-\037\177()<>@,;:\\"\/\[\]?={} ]+/) end ## # auth-scheme = token # # Parses an auth scheme (a token) alias auth_scheme token ## # auth-param = token "=" ( token | quoted-string ) # # Parses an auth parameter def auth_param return nil unless name = token return nil unless @scanner.scan(/ *= */) value = if @scanner.peek(1) == '"' then quoted_string else token end return nil unless value return name, value end ## # quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) # qdtext = > # quoted-pair = "\" CHAR # # For TEXT, the rules of RFC 2047 are ignored. def quoted_string return nil unless @scanner.scan(/"/) text = '' while true do chunk = @scanner.scan(/[\r\n \t\041\043-\176\200-\377]+/) # not " if chunk then text << chunk text << @scanner.get_byte if chunk.end_with? '\\' and '"' == @scanner.peek(1) else if '"' == @scanner.peek(1) then @scanner.get_byte break else return nil end end end text end end mechanize-2.7.5/lib/mechanize/http/auth_realm.rb0000644000004100000410000000102012772546476021673 0ustar www-datawww-dataclass Mechanize::HTTP::AuthRealm attr_reader :scheme attr_reader :uri attr_reader :realm def initialize scheme, uri, realm @scheme = scheme @uri = uri @realm = realm if realm end def == other self.class === other and @scheme == other.scheme and @uri == other.uri and @realm == other.realm end alias eql? == def hash # :nodoc: [@scheme, @uri, @realm].hash end def inspect # :nodoc: "#" % [@scheme, @uri, @realm] end end mechanize-2.7.5/lib/mechanize/http/auth_challenge.rb0000644000004100000410000000241312772546476022524 0ustar www-datawww-dataclass Mechanize::HTTP AuthChallenge = Struct.new :scheme, :params, :raw ## # A parsed WWW-Authenticate header class AuthChallenge ## # :attr_accessor: scheme # # The authentication scheme ## # :attr_accessor: params # # The authentication parameters ## # :method: initialize # # :call-seq: # initialize(scheme = nil, params = nil) # # Creates a new AuthChallenge header with the given scheme and parameters ## # Retrieves +param+ from the params list def [] param params[param] end ## # Constructs an AuthRealm for this challenge def realm uri case scheme when 'Basic' then raise ArgumentError, "provide uri for Basic authentication" unless uri Mechanize::HTTP::AuthRealm.new scheme, uri + '/', self['realm'] when 'Digest' then Mechanize::HTTP::AuthRealm.new scheme, uri + '/', self['realm'] else raise Mechanize::Error, "unknown HTTP authentication scheme #{scheme}" end end ## # The name of the realm for this challenge def realm_name params['realm'] if Hash === params # NTLM has a string for params end ## # The raw authentication challenge alias to_s raw end end mechanize-2.7.5/lib/mechanize/form/0000755000004100000410000000000012772546476017220 5ustar www-datawww-datamechanize-2.7.5/lib/mechanize/form/radio_button.rb0000644000004100000410000000221712772546476022240 0ustar www-datawww-data## # This class represents a radio button found in a Form. To activate the # RadioButton in the Form, set the checked method to true. class Mechanize::Form::RadioButton < Mechanize::Form::Field attr_accessor :checked attr_reader :form def initialize node, form @checked = !!node['checked'] @form = form super(node) end def == other # :nodoc: self.class === other and other.form == @form and other.name == @name and other.value == @value end alias eql? == # :nodoc: def check uncheck_peers @checked = true end alias checked? checked def uncheck @checked = false end def click checked ? uncheck : check end def hash # :nodoc: @form.hash ^ @name.hash ^ @value.hash end def label (id = self['id']) && @form.page.labels_hash[id] || nil end def text label.text rescue nil end def [](key) @node[key] end def pretty_print_instance_variables # :nodoc: [:@checked, :@name, :@value] end private def uncheck_peers @form.radiobuttons_with(:name => name).each do |b| next if b.value == value b.uncheck end end end mechanize-2.7.5/lib/mechanize/form/hidden.rb0000644000004100000410000000007412772546476021001 0ustar www-datawww-dataclass Mechanize::Form::Hidden < Mechanize::Form::Field end mechanize-2.7.5/lib/mechanize/form/keygen.rb0000644000004100000410000000173012772546476021030 0ustar www-datawww-data## # This class represents a keygen (public / private key generator) found in a # Form. The field will automatically generate a key pair and compute its own # value to match the challenge. Call key to access the public/private key # pair. class Mechanize::Form::Keygen < Mechanize::Form::Field # The challenge for this . attr_reader :challenge # The key associated with this tag. attr_reader :key def initialize(node, value = nil) super @challenge = node['challenge'] @spki = OpenSSL::Netscape::SPKI.new @spki.challenge = @challenge @key = nil generate_key if value.nil? || value.empty? end # Generates a key pair and sets the field's value. def generate_key(key_size = 2048) # Spec at http://dev.w3.org/html5/spec/Overview.html#the-keygen-element @key = OpenSSL::PKey::RSA.new key_size @spki.public_key = @key.public_key @spki.sign @key, OpenSSL::Digest::MD5.new self.value = @spki.to_pem end end mechanize-2.7.5/lib/mechanize/form/reset.rb0000644000004100000410000000007412772546476020670 0ustar www-datawww-dataclass Mechanize::Form::Reset < Mechanize::Form::Button end mechanize-2.7.5/lib/mechanize/form/submit.rb0000644000004100000410000000007512772546476021052 0ustar www-datawww-dataclass Mechanize::Form::Submit < Mechanize::Form::Button end mechanize-2.7.5/lib/mechanize/form/text.rb0000644000004100000410000000007212772546476020530 0ustar www-datawww-dataclass Mechanize::Form::Text < Mechanize::Form::Field end mechanize-2.7.5/lib/mechanize/form/button.rb0000644000004100000410000000013412772546476021056 0ustar www-datawww-data## # A Submit button in a Form class Mechanize::Form::Button < Mechanize::Form::Field end mechanize-2.7.5/lib/mechanize/form/textarea.rb0000644000004100000410000000007612772546476021365 0ustar www-datawww-dataclass Mechanize::Form::Textarea < Mechanize::Form::Field end mechanize-2.7.5/lib/mechanize/form/image_button.rb0000644000004100000410000000061412772546476022223 0ustar www-datawww-data## # This class represents an image button in a form. Use the x and y methods to # set the x and y positions for where the mouse "clicked". class Mechanize::Form::ImageButton < Mechanize::Form::Button attr_accessor :x, :y def initialize *args @x = nil @y = nil super end def query_value [["#{@name}.x", (@x || 0).to_s], ["#{@name}.y", (@y || 0).to_s]] end end mechanize-2.7.5/lib/mechanize/form/file_upload.rb0000644000004100000410000000117012772546476022027 0ustar www-datawww-data# This class represents a file upload field found in a form. To use this # class, set FileUpload#file_data= to the data of the file you want to upload # and FileUpload#mime_type= to the appropriate mime type of the file. # # See the example in EXAMPLES class Mechanize::Form::FileUpload < Mechanize::Form::Field attr_accessor :file_name # File name attr_accessor :mime_type # Mime Type (Optional) alias :file_data :value alias :file_data= :value= def initialize node, file_name @file_name = Mechanize::Util.html_unescape(file_name) @file_data = nil @node = node super(node, @file_data) end end mechanize-2.7.5/lib/mechanize/form/multi_select_list.rb0000644000004100000410000000360312772546476023273 0ustar www-datawww-data## # This class represents a select list where multiple values can be selected. # MultiSelectList#value= accepts an array, and those values are used as # values for the select list. For example, to select multiple values, # simply do this: # # list.value = ['one', 'two'] # # Single values are still supported, so these two are the same: # # list.value = ['one'] # list.value = 'one' class Mechanize::Form::MultiSelectList < Mechanize::Form::Field extend Mechanize::ElementMatcher attr_accessor :options def initialize node value = [] @options = [] # parse node.search('option').each do |n| @options << Mechanize::Form::Option.new(n, self) end super node, value end ## # :method: option_with # # Find one option on this select list with +criteria+ # # Example: # # select_list.option_with(:value => '1').value = 'foo' ## # :mehtod: option_with!(criteria) # # Same as +option_with+ but raises an ElementNotFoundError if no button # matches +criteria+ ## # :method: options_with # # Find all options on this select list with +criteria+ # # Example: # # select_list.options_with(:value => /1|2/).each do |field| # field.value = '20' # end elements_with :option def query_value value ? value.map { |v| [name, v] } : '' end # Select no options def select_none @value = [] options.each(&:untick) end # Select all options def select_all @value = [] options.each(&:tick) end # Get a list of all selected options def selected_options @options.find_all(&:selected?) end def value=(values) select_none [values].flatten.each do |value| option = options.find { |o| o.value == value } if option.nil? @value.push(value) else option.select end end end def value @value + selected_options.map(&:value) end end mechanize-2.7.5/lib/mechanize/form/field.rb0000644000004100000410000000331212772546476020627 0ustar www-datawww-data## # This class represents a field in a form. It handles the following input # tags found in a form: # # * text # * password # * hidden # * int # * textarea # * keygen # # To set the value of a field, just use the value method: # # field.value = "foo" class Mechanize::Form::Field attr_accessor :name, :value, :node, :type # This fields value before it's sent through Util.html_unescape. attr_reader :raw_value # index is used to maintain order for fields with Hash nodes attr_accessor :index def initialize node, value = node['value'] @node = node @name = Mechanize::Util.html_unescape(node['name']) @raw_value = value @value = if value.is_a? String Mechanize::Util.html_unescape(value) else value end @type = node['type'] end def query_value [[@name, @value || '']] end def <=> other return 0 if self == other # If both are hashes, sort by index if Hash === node && Hash === other.node && index return index <=> other.index end # Otherwise put Hash based fields at the end return 1 if Hash === node return -1 if Hash === other.node # Finally let nokogiri determine sort order node <=> other.node end # This method is a shortcut to get field's DOM id. # Common usage: form.field_with(:dom_id => "foo") def dom_id node['id'] end # This method is a shortcut to get field's DOM class. # Common usage: form.field_with(:dom_class => "foo") def dom_class node['class'] end def inspect # :nodoc: "[%s:0x%x type: %s name: %s value: %s]" % [ self.class.name.sub(/Mechanize::Form::/, '').downcase, object_id, type, name, value ] end end mechanize-2.7.5/lib/mechanize/form/option.rb0000644000004100000410000000214512772546476021057 0ustar www-datawww-data## # This class contains an option found within SelectList. A SelectList can # have many Option classes associated with it. An option can be selected by # calling Option#tick, or Option#click. # # To select the first option in a list: # # select_list.first.tick class Mechanize::Form::Option attr_reader :value, :selected, :text, :select_list alias :to_s :value alias :selected? :selected def initialize(node, select_list) @text = node.inner_text @value = Mechanize::Util.html_unescape(node['value'] || node.inner_text) @selected = node.has_attribute? 'selected' @select_list = select_list # The select list this option belongs to end # Select this option def select unselect_peers @selected = true end # Unselect this option def unselect @selected = false end alias :tick :select alias :untick :unselect # Toggle the selection value of this option def click unselect_peers @selected = !@selected end private def unselect_peers return unless Mechanize::Form::SelectList === @select_list @select_list.select_none end end mechanize-2.7.5/lib/mechanize/form/select_list.rb0000644000004100000410000000204112772546476022054 0ustar www-datawww-data# This class represents a select list or drop down box in a Form. Set the # value for the list by calling SelectList#value=. SelectList contains a list # of Option that were found. After finding the correct option, set the select # lists value to the option value: # # selectlist.value = selectlist.options.first.value # # Options can also be selected by "clicking" or selecting them. See Option class Mechanize::Form::SelectList < Mechanize::Form::MultiSelectList def initialize node super if selected_options.length > 1 selected_options.reverse[1..selected_options.length].each do |o| o.unselect end end end def value value = super if value.length > 0 value.last elsif @options.length > 0 @options.first.value else nil end end def value=(new_value) if new_value != new_value.to_s and new_value.respond_to? :first super([new_value.first]) else super([new_value.to_s]) end end def query_value value ? [[name, value]] : nil end end mechanize-2.7.5/lib/mechanize/form/check_box.rb0000644000004100000410000000066712772546476021503 0ustar www-datawww-data## # This class represents a check box found in a Form. To activate the CheckBox # in the Form, set the checked method to true. class Mechanize::Form::CheckBox < Mechanize::Form::RadioButton def query_value [[@name, @value || "on"]] end def inspect # :nodoc: "[%s:0x%x type: %s name: %s value: %s]" % [ self.class.name.sub(/Mechanize::Form::/, '').downcase, object_id, type, name, checked ] end end mechanize-2.7.5/lib/mechanize/cookie.rb0000644000004100000410000000307412772546476020057 0ustar www-datawww-datawarn 'mechanize/cookie will be deprecated. Please migrate to the http-cookie APIs.' if $VERBOSE require 'http/cookie' class Mechanize module CookieDeprecated def __deprecated__(to = nil) $VERBOSE or return method = caller[0][/([^`]+)(?='$)/] to ||= method case self when Class lname = name[/[^:]+$/] klass = 'Mechanize::%s' % lname this = '%s.%s' % [klass, method] that = 'HTTP::%s.%s' % [lname, to] else lname = self.class.name[/[^:]+$/] klass = 'Mechanize::%s' % lname this = '%s#%s' % [klass, method] that = 'HTTP::%s#%s' % [lname, to] end warn '%s: The call of %s needs to be fixed to follow the new API (%s).' % [caller[1], this, that] end private :__deprecated__ end module CookieCMethods include CookieDeprecated def parse(arg1, arg2, arg3 = nil, &block) if arg1.is_a?(URI) __deprecated__ return [] if arg2.nil? super(arg2, arg1, { :logger => arg3 }) else super end end end module CookieIMethods include CookieDeprecated def set_domain(domain) __deprecated__ :domain= @domain = domain end end Cookie = ::HTTP::Cookie # Compatibility for Ruby 1.8/1.9 unless Cookie.respond_to?(:prepend, true) require 'mechanize/prependable' class Cookie extend Prependable class << self extend Prependable end end end class Cookie prepend CookieIMethods class << self prepend CookieCMethods end end end mechanize-2.7.5/lib/mechanize/redirect_limit_reached_error.rb0000644000004100000410000000056112772546476024467 0ustar www-datawww-data## # Raised when too many redirects are sent class Mechanize::RedirectLimitReachedError < Mechanize::Error attr_reader :page attr_reader :redirects attr_reader :response_code def initialize page, redirects @page = page @redirects = redirects @response_code = page.code super "Redirect limit of #{redirects} reached" end end mechanize-2.7.5/lib/mechanize/response_read_error.rb0000644000004100000410000000162012772546476022643 0ustar www-datawww-data## # Raised when Mechanize encounters an error while reading the response body # from the server. Contains the response headers and the response body up to # the error along with the initial error. class Mechanize::ResponseReadError < Mechanize::Error attr_reader :body_io attr_reader :error attr_reader :mechanize attr_reader :response attr_reader :uri ## # Creates a new ResponseReadError with the +error+ raised, the +response+ # and the +body_io+ for content read so far. def initialize error, response, body_io, uri, mechanize @body_io = body_io @error = error @mechanize = mechanize @response = response @uri = uri end ## # Converts this error into a Page, File, etc. based on the content-type def force_parse @mechanize.parse @uri, @response, @body_io end def message # :nodoc: "#{@error.message} (#{self.class})" end end mechanize-2.7.5/lib/mechanize/unauthorized_error.rb0000644000004100000410000000057412772546476022542 0ustar www-datawww-dataclass Mechanize::UnauthorizedError < Mechanize::ResponseCodeError attr_reader :challenges def initialize page, challenges, message super page, message @challenges = challenges end def to_s out = super if @challenges then realms = @challenges.map(&:realm_name).join ', ' out << " -- available realms: #{realms}" end out end end mechanize-2.7.5/lib/mechanize/xml_file.rb0000644000004100000410000000237312772546476020406 0ustar www-datawww-data## # This class encapsulates an XML file. If Mechanize finds a content-type # of 'text/xml' or 'application/xml' this class will be instantiated and # returned. This class also opens up the +search+ and +at+ methods available # on the underlying Nokogiri::XML::Document object. # # Example: # # require 'mechanize' # # agent = Mechanize.new # xml = agent.get('http://example.org/some-xml-file.xml') # xml.class #=> Mechanize::XmlFile # xml.search('//foo[@attr="bar"]/etc') class Mechanize::XmlFile < Mechanize::File extend Forwardable # The underlying Nokogiri::XML::Document object attr_reader :xml def initialize(uri = nil, response = nil, body = nil, code = nil) super uri, response, body, code @xml = Nokogiri.XML body end ## # :method: search # # Search for +paths+ in the page using Nokogiri's #search. The +paths+ can # be XPath or CSS and an optional Hash of namespaces may be appended. # # See Nokogiri::XML::Node#search for further details. def_delegator :xml, :search, :search ## # :method: at # # Search through the page for +path+ under +namespace+ using Nokogiri's #at. # The +path+ may be either a CSS or XPath expression. # # See also Nokogiri::XML::Node#at def_delegator :xml, :at, :at endmechanize-2.7.5/lib/mechanize/util.rb0000644000004100000410000001075112772546476017563 0ustar www-datawww-datarequire 'cgi' require 'nkf' class Mechanize::Util # default mime type data for Page::Image#mime_type. # You can use another Apache-compatible mimetab. # mimetab = WEBrick::HTTPUtils.load_mime_types('/etc/mime.types') # Mechanize::Util::DefaultMimeTypes.replace(mimetab) DefaultMimeTypes = WEBrick::HTTPUtils::DefaultMimeTypes class << self # Builds a query string from a given enumerable object # +parameters+. This method uses Mechanize::Util.each_parameter # as preprocessor, which see. def build_query_string(parameters, enc = nil) each_parameter(parameters).inject(nil) { |s, (k, v)| # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*. (s.nil? ? '' : s << '&') << [CGI.escape(k.to_s), CGI.escape(v.to_s)].join('=') } || '' end # Parses an enumerable object +parameters+ and iterates over the # key-value pairs it contains. # # +parameters+ may be a hash, or any enumerable object which # iterates over [key, value] pairs, typically an array of arrays. # # If a key is paired with an array-like object, the pair is # expanded into multiple occurrences of the key, one for each # element of the array. e.g. { a: [1, 2] } => [:a, 1], [:a, 2] # # If a key is paired with a hash-like object, the pair is expanded # into hash-like multiple pairs, one for each pair of the hash. # e.g. { a: { x: 1, y: 2 } } => ['a[x]', 1], ['a[y]', 2] # # An array-like value is allowed to be specified as hash value. # e.g. { a: { q: [1, 2] } } => ['a[q]', 1], ['a[q]', 2] # # For a non-array-like, non-hash-like value, the key-value pair is # yielded as is. def each_parameter(parameters, &block) return to_enum(__method__, parameters) if block.nil? parameters.each { |key, value| each_parameter_1(key, value, &block) } end private def each_parameter_1(key, value, &block) return if key.nil? case when s = String.try_convert(value) yield [key, s] when a = Array.try_convert(value) a.each { |avalue| yield [key, avalue] } when h = Hash.try_convert(value) h.each { |hkey, hvalue| each_parameter_1('%s[%s]' % [key, hkey], hvalue, &block) } else yield [key, value] end end end # Converts string +s+ from +code+ to UTF-8. def self.from_native_charset(s, code, ignore_encoding_error = false, log = nil) return s unless s && code return s unless Mechanize.html_parser == Nokogiri::HTML begin s.encode(code) rescue EncodingError => ex log.debug("from_native_charset: #{ex.class}: form encoding: #{code.inspect} string: #{s}") if log if ignore_encoding_error s else raise end end end def self.html_unescape(s) return s unless s s.gsub(/&(\w+|#[0-9]+);/) { |match| number = case match when /&(\w+);/ Mechanize.html_parser::NamedCharacters[$1] when /&#([0-9]+);/ $1.to_i end number ? ([number].pack('U') rescue match) : match } end case NKF::BINARY when Encoding def self.guess_encoding(src) # NKF.guess of JRuby may return nil NKF.guess(src) || Encoding::US_ASCII end else # Old NKF from 1.8, still bundled with Rubinius NKF_ENCODING_MAP = { NKF::UNKNOWN => Encoding::US_ASCII, NKF::BINARY => Encoding::ASCII_8BIT, NKF::ASCII => Encoding::US_ASCII, NKF::JIS => Encoding::ISO_2022_JP, NKF::EUC => Encoding::EUC_JP, NKF::SJIS => Encoding::Shift_JIS, NKF::UTF8 => Encoding::UTF_8, NKF::UTF16 => Encoding::UTF_16BE, NKF::UTF32 => Encoding::UTF_32BE, } def self.guess_encoding(src) NKF_ENCODING_MAP[NKF.guess(src)] end end def self.detect_charset(src) if src guess_encoding(src).name.upcase else Encoding::ISO8859_1.name end end def self.uri_escape str, unsafe = nil @parser ||= begin URI::Parser.new rescue NameError URI end if URI == @parser then unsafe ||= URI::UNSAFE else unsafe ||= @parser.regexp[:UNSAFE] end @parser.escape str, unsafe end def self.uri_unescape str @parser ||= begin URI::Parser.new rescue NameError URI end @parser.unescape str end end mechanize-2.7.5/lib/mechanize/image.rb0000644000004100000410000000016012772546476017661 0ustar www-datawww-data## # An Image holds downloaded data for an image/* response. class Mechanize::Image < Mechanize::Download end mechanize-2.7.5/lib/mechanize.rb0000644000004100000410000010567712772546476016622 0ustar www-datawww-datarequire 'mechanize/version' require 'fileutils' require 'forwardable' require 'mutex_m' require 'net/http/digest_auth' require 'net/http/persistent' require 'nokogiri' require 'openssl' require 'pp' require 'stringio' require 'uri' require 'webrick/httputils' require 'zlib' ## # The Mechanize library is used for automating interactions with a website. It # can follow links and submit forms. Form fields can be populated and # submitted. A history of URLs is maintained and can be queried. # # == Example # # require 'mechanize' # require 'logger' # # agent = Mechanize.new # agent.log = Logger.new "mech.log" # agent.user_agent_alias = 'Mac Safari' # # page = agent.get "http://www.google.com/" # search_form = page.form_with :name => "f" # search_form.field_with(:name => "q").value = "Hello" # # search_results = agent.submit search_form # puts search_results.body # # == Issues with mechanize # # If you think you have a bug with mechanize, but aren't sure, please file a # ticket at https://github.com/sparklemotion/mechanize/issues # # Here are some common problems you may experience with mechanize # # === Problems connecting to SSL sites # # Mechanize defaults to validating SSL certificates using the default CA # certificates for your platform. At this time, Windows users do not have # integration between the OS default CA certificates and OpenSSL. #cert_store # explains how to download and use Mozilla's CA certificates to allow SSL # sites to work. # # === Problems with content-length # # Some sites return an incorrect content-length value. Unlike a browser, # mechanize raises an error when the content-length header does not match the # response length since it does not know if there was a connection problem or # if the mismatch is a server bug. # # The error raised, Mechanize::ResponseReadError, can be converted to a parsed # Page, File, etc. depending upon the content-type: # # agent = Mechanize.new # uri = URI 'http://example/invalid_content_length' # # begin # page = agent.get uri # rescue Mechanize::ResponseReadError => e # page = e.force_parse # end class Mechanize ## # Base mechanize error class class Error < RuntimeError end ruby_version = if RUBY_PATCHLEVEL >= 0 then "#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}" else "#{RUBY_VERSION}dev#{RUBY_REVISION}" end ## # Supported User-Agent aliases for use with user_agent_alias=. The # description in parenthesis is for informative purposes and is not part of # the alias name. # # * Linux Firefox (43.0 on Ubuntu Linux) # * Linux Konqueror (3) # * Linux Mozilla # * Mac Firefox (43.0) # * Mac Mozilla # * Mac Safari (9.0 on OS X 10.11.2) # * Mac Safari 4 # * Mechanize (default) # * Windows IE 6 # * Windows IE 7 # * Windows IE 8 # * Windows IE 9 # * Windows IE 10 (Windows 8 64bit) # * Windows IE 11 (Windows 8.1 64bit) # * Windows Edge # * Windows Mozilla # * Windows Firefox (43.0) # * iPhone (iOS 9.1) # * iPad (iOS 9.1) # * Android (5.1.1) # # Example: # # agent = Mechanize.new # agent.user_agent_alias = 'Mac Safari' AGENT_ALIASES = { 'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/sparklemotion/mechanize/)", 'Linux Firefox' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:43.0) Gecko/20100101 Firefox/43.0', 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)', 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624', 'Mac Firefox' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:43.0) Gecko/20100101 Firefox/43.0', 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401', 'Mac Safari 4' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10', 'Mac Safari' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9', 'Windows Chrome' => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.125 Safari/537.36', 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)', 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', 'Windows IE 8' => 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', 'Windows IE 9' => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)', 'Windows IE 10' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; WOW64; Trident/6.0)', 'Windows IE 11' => 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko', 'Windows Edge' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586', 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6', 'Windows Firefox' => 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0', 'iPhone' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B5110e Safari/601.1', 'iPad' => 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1', 'Android' => 'Mozilla/5.0 (Linux; Android 5.1.1; Nexus 7 Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.76 Safari/537.36', } AGENT_ALIASES.default_proc = proc { |hash, key| case key when /FireFox/ if ua = hash[nkey = key.sub(/FireFox/, 'Firefox')] warn "Mechanize#user_agent_alias: #{key.inspect} should be spelled as #{nkey.inspect}" ua end end } def self.inherited(child) # :nodoc: child.html_parser = html_parser child.log = log super end ## # Creates a new Mechanize instance and yields it to the given block. # # After the block executes, the instance is cleaned up. This includes # closing all open connections. # # Mechanize.start do |m| # m.get("http://example.com") # end def self.start instance = new yield(instance) ensure instance.shutdown end ## # Creates a new mechanize instance. If a block is given, the created # instance is yielded to the block for setting up pre-connection state such # as SSL parameters or proxies: # # agent = Mechanize.new do |a| # a.proxy_host = 'proxy.example' # a.proxy_port = 8080 # end # # If you need segregated SSL connections give each agent a unique # name. Otherwise the connections will be shared. This is # particularly important if you are using certifcates. # # agent_1 = Mechanize.new 'conn1' # agent_2 = Mechanize.new 'conn2' # def initialize(connection_name = 'mechanize') @agent = Mechanize::HTTP::Agent.new(connection_name) @agent.context = self @log = nil # attr_accessors @agent.user_agent = AGENT_ALIASES['Mechanize'] @watch_for_set = nil @history_added = nil # attr_readers @pluggable_parser = PluggableParser.new @keep_alive_time = 0 # Proxy @proxy_addr = nil @proxy_port = nil @proxy_user = nil @proxy_pass = nil @html_parser = self.class.html_parser @default_encoding = nil @force_default_encoding = false # defaults @agent.max_history = 50 yield self if block_given? @agent.set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass end # :section: History # # Methods for navigating and controlling history ## # Equivalent to the browser back button. Returns the previous page visited. def back @agent.history.pop end ## # Returns the latest page loaded by Mechanize def current_page @agent.current_page end alias page current_page ## # The history of this mechanize run def history @agent.history end ## # Maximum number of items allowed in the history. The default setting is 50 # pages. Note that the size of the history multiplied by the maximum # response body size def max_history @agent.history.max_size end ## # Sets the maximum number of items allowed in the history to +length+. # # Setting the maximum history length to nil will make the history size # unlimited. Take care when doing this, mechanize stores response bodies in # memory for pages and in the temporary files directory for other responses. # For a long-running mechanize program this can be quite large. # # See also the discussion under #max_file_buffer= def max_history= length @agent.history.max_size = length end ## # Returns a visited page for the +url+ passed in, otherwise nil def visited? url url = url.href if url.respond_to? :href @agent.visited_page url end ## # Returns whether or not a url has been visited alias visited_page visited? # :section: Hooks # # Hooks into the operation of mechanize ## # A list of hooks to call before reading response header 'content-encoding'. # # The hook is called with the agent making the request, the URI of the # request, the response an IO containing the response body. def content_encoding_hooks @agent.content_encoding_hooks end ## # Callback which is invoked with the page that was added to history. attr_accessor :history_added ## # A list of hooks to call after retrieving a response. Hooks are called with # the agent, the URI, the response, and the response body. def post_connect_hooks @agent.post_connect_hooks end ## # A list of hooks to call before retrieving a response. Hooks are called # with the agent, the URI, the response, and the response body. def pre_connect_hooks @agent.pre_connect_hooks end # :section: Requests # # Methods for making HTTP requests ## # If the parameter is a string, finds the button or link with the # value of the string on the current page and clicks it. Otherwise, clicks # the Mechanize::Page::Link object passed in. Returns the page fetched. def click link case link when Page::Link then referer = link.page || current_page() if @agent.robots if (referer.is_a?(Page) and referer.parser.nofollow?) or link.rel?('nofollow') then raise RobotsDisallowedError.new(link.href) end end if link.noreferrer? href = @agent.resolve(link.href, link.page || current_page) referer = Page.new else href = link.href end get href, [], referer when String, Regexp then if real_link = page.link_with(:text => link) click real_link else button = nil # Note that this will not work if we have since navigated to a different page. # Should rather make each button aware of its parent form. form = page.forms.find do |f| button = f.button_with(:value => link) button.is_a? Form::Submit end submit form, button if form end when Form::Submit, Form::ImageButton then # Note that this will not work if we have since navigated to a different page. # Should rather make each button aware of its parent form. form = page.forms.find do |f| f.buttons.include?(link) end submit form, link if form else referer = current_page() href = link.respond_to?(:href) ? link.href : (link['href'] || link['src']) get href, [], referer end end ## # GETs +uri+ and writes it to +io_or_filename+ without recording the request # in the history. If +io_or_filename+ does not respond to #write it will be # used as a file name. +parameters+, +referer+ and +headers+ are used as in # #get. # # By default, if the Content-type of the response matches a Mechanize::File # or Mechanize::Page parser, the response body will be loaded into memory # before being saved. See #pluggable_parser for details on changing this # default. # # For alternate ways of downloading files see Mechanize::FileSaver and # Mechanize::DirectorySaver. def download uri, io_or_filename, parameters = [], referer = nil, headers = {} page = transact do get uri, parameters, referer, headers end io = if io_or_filename.respond_to? :write then io_or_filename else open io_or_filename, 'wb' end case page when Mechanize::File then io.write page.body else body_io = page.body_io until body_io.eof? do io.write body_io.read 16384 end end page ensure io.close if io and not io_or_filename.respond_to? :write end ## # DELETE +uri+ with +query_params+, and setting +headers+: # # +query_params+ is formatted into a query string using # Mechanize::Util.build_query_string, which see. # # delete('http://example/', {'q' => 'foo'}, {}) def delete(uri, query_params = {}, headers = {}) page = @agent.fetch(uri, :delete, headers, query_params) add_to_history(page) page end ## # GET the +uri+ with the given request +parameters+, +referer+ and # +headers+. # # The +referer+ may be a URI or a page. # # +parameters+ is formatted into a query string using # Mechanize::Util.build_query_string, which see. def get(uri, parameters = [], referer = nil, headers = {}) method = :get referer ||= if uri.to_s =~ %r{\Ahttps?://} Page.new else current_page || Page.new end # FIXME: Huge hack so that using a URI as a referer works. I need to # refactor everything to pass around URIs but still support # Mechanize::Page#base unless Mechanize::Parser === referer then referer = if referer.is_a?(String) then Page.new URI(referer) else Page.new referer end end # fetch the page headers ||= {} page = @agent.fetch uri, method, headers, parameters, referer add_to_history(page) yield page if block_given? page end ## # GET +url+ and return only its contents def get_file(url) get(url).body end ## # HEAD +uri+ with +query_params+ and +headers+: # # +query_params+ is formatted into a query string using # Mechanize::Util.build_query_string, which see. # # head('http://example/', {'q' => 'foo'}, {}) def head(uri, query_params = {}, headers = {}) page = @agent.fetch uri, :head, headers, query_params yield page if block_given? page end ## # POST to the given +uri+ with the given +query+. # # +query+ is processed using Mechanize::Util.each_parameter (which # see), and then encoded into an entity body. If any IO/FileUpload # object is specified as a field value the "enctype" will be # multipart/form-data, or application/x-www-form-urlencoded # otherwise. # # Examples: # agent.post 'http://example.com/', "foo" => "bar" # # agent.post 'http://example.com/', [%w[foo bar]] # # agent.post('http://example.com/', "hello", # 'Content-Type' => 'application/xml') def post(uri, query = {}, headers = {}) return request_with_entity(:post, uri, query, headers) if String === query node = {} # Create a fake form class << node def search(*args); []; end end node['method'] = 'POST' node['enctype'] = 'application/x-www-form-urlencoded' form = Form.new(node) Mechanize::Util.each_parameter(query) { |k, v| if v.is_a?(IO) form.enctype = 'multipart/form-data' ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path)) ul.file_data = v.read form.file_uploads << ul elsif v.is_a?(Form::FileUpload) form.enctype = 'multipart/form-data' form.file_uploads << v else form.fields << Form::Field.new({'name' => k.to_s},v) end } post_form(uri, form, headers) end ## # PUT to +uri+ with +entity+, and setting +headers+: # # put('http://example/', 'new content', {'Content-Type' => 'text/plain'}) def put(uri, entity, headers = {}) request_with_entity(:put, uri, entity, headers) end ## # Makes an HTTP request to +url+ using HTTP method +verb+. +entity+ is used # as the request body, if allowed. def request_with_entity(verb, uri, entity, headers = {}) cur_page = current_page || Page.new log.debug("query: #{ entity.inspect }") if log headers = { 'Content-Type' => 'application/octet-stream', 'Content-Length' => entity.size.to_s, }.update headers page = @agent.fetch uri, verb, headers, [entity], cur_page add_to_history(page) page end ## # Submits +form+ with an optional +button+. # # Without a button: # # page = agent.get('http://example.com') # agent.submit(page.forms.first) # # With a button: # # agent.submit(page.forms.first, page.forms.first.buttons.first) def submit(form, button = nil, headers = {}) form.add_button_to_query(button) if button case form.method.upcase when 'POST' post_form(form.action, form, headers) when 'GET' get(form.action.gsub(/\?[^\?]*$/, ''), form.build_query, form.page, headers) else raise ArgumentError, "unsupported method: #{form.method.upcase}" end end ## # Runs given block, then resets the page history as it was before. self is # given as a parameter to the block. Returns the value of the block. def transact history_backup = @agent.history.dup begin yield self ensure @agent.history = history_backup end end # :section: Settings # # Settings that adjust how mechanize makes HTTP requests including timeouts, # keep-alives, compression, redirects and headers. @html_parser = Nokogiri::HTML @log = nil class << self ## # Default HTML parser for all mechanize instances # # Mechanize.html_parser = Nokogiri::XML attr_accessor :html_parser ## # Default logger for all mechanize instances # # Mechanize.log = Logger.new $stderr attr_accessor :log end ## # A default encoding name used when parsing HTML parsing. When set it is # used after any other encoding. The default is nil. attr_accessor :default_encoding ## # Overrides the encodings given by the HTTP server and the HTML page with # the default_encoding when set to true. attr_accessor :force_default_encoding ## # The HTML parser to be used when parsing documents attr_accessor :html_parser ## # HTTP/1.0 keep-alive time. This is no longer supported by mechanize as it # now uses net-http-persistent which only supports HTTP/1.1 persistent # connections attr_accessor :keep_alive_time ## # The pluggable parser maps a response Content-Type to a parser class. The # registered Content-Type may be either a full content type like 'image/png' # or a media type 'text'. See Mechanize::PluggableParser for further # details. # # Example: # # agent.pluggable_parser['application/octet-stream'] = Mechanize::Download attr_reader :pluggable_parser ## # The HTTP proxy address attr_reader :proxy_addr ## # The HTTP proxy password attr_reader :proxy_pass ## # The HTTP proxy port attr_reader :proxy_port ## # The HTTP proxy username attr_reader :proxy_user ## # *NOTE*: These credentials will be used as a default for any challenge # exposing your password to disclosure to malicious servers. Use of this # method will warn. This method is deprecated and will be removed in # mechanize 3. # # Sets the +user+ and +password+ as the default credentials to be used for # HTTP authentication for any server. The +domain+ is used for NTLM # authentication. def auth user, password, domain = nil caller.first =~ /(.*?):(\d+).*?$/ warn <<-WARNING At #{$1} line #{$2} Use of #auth and #basic_auth are deprecated due to a security vulnerability. WARNING @agent.add_default_auth user, password, domain end alias basic_auth auth ## # Adds credentials +user+, +pass+ for +uri+. If +realm+ is set the # credentials are used only for that realm. If +realm+ is not set the # credentials become the default for any realm on that URI. # # +domain+ and +realm+ are exclusive as NTLM does not follow RFC 2617. If # +domain+ is given it is only used for NTLM authentication. def add_auth uri, user, password, realm = nil, domain = nil @agent.add_auth uri, user, password, realm, domain end ## # Are If-Modified-Since conditional requests enabled? def conditional_requests @agent.conditional_requests end ## # Disables If-Modified-Since conditional requests (enabled by default) def conditional_requests= enabled @agent.conditional_requests = enabled end ## # A Mechanize::CookieJar which stores cookies def cookie_jar @agent.cookie_jar end ## # Replaces the cookie jar with +cookie_jar+ def cookie_jar= cookie_jar @agent.cookie_jar = cookie_jar end ## # Returns a list of cookies stored in the cookie jar. def cookies @agent.cookie_jar.to_a end ## # Follow HTML meta refresh and HTTP Refresh headers. If set to +:anywhere+ # meta refresh tags outside of the head element will be followed. def follow_meta_refresh @agent.follow_meta_refresh end ## # Controls following of HTML meta refresh and HTTP Refresh headers in # responses. def follow_meta_refresh= follow @agent.follow_meta_refresh = follow end ## # Follow an HTML meta refresh and HTTP Refresh headers that have no "url=" # in the content attribute. # # Defaults to false to prevent infinite refresh loops. def follow_meta_refresh_self @agent.follow_meta_refresh_self end ## # Alters the following of HTML meta refresh and HTTP Refresh headers that # point to the same page. def follow_meta_refresh_self= follow @agent.follow_meta_refresh_self = follow end ## # Is gzip compression of responses enabled? def gzip_enabled @agent.gzip_enabled end ## # Disables HTTP/1.1 gzip compression (enabled by default) def gzip_enabled=enabled @agent.gzip_enabled = enabled end ## # Connections that have not been used in this many seconds will be reset. def idle_timeout @agent.idle_timeout end # Sets the idle timeout to +idle_timeout+. The default timeout is 5 # seconds. If you experience "too many connection resets", reducing this # value may help. def idle_timeout= idle_timeout @agent.idle_timeout = idle_timeout end ## # When set to true mechanize will ignore an EOF during chunked transfer # encoding so long as at least one byte was received. Be careful when # enabling this as it may cause data loss. # # Net::HTTP does not inform mechanize of where in the chunked stream the EOF # occurred. Usually it is after the last-chunk but before the terminating # CRLF (invalid termination) but it may occur earlier. In the second case # your response body may be incomplete. def ignore_bad_chunking @agent.ignore_bad_chunking end ## # When set to true mechanize will ignore an EOF during chunked transfer # encoding. See ignore_bad_chunking for further details def ignore_bad_chunking= ignore_bad_chunking @agent.ignore_bad_chunking = ignore_bad_chunking end ## # Are HTTP/1.1 keep-alive connections enabled? def keep_alive @agent.keep_alive end ## # Disable HTTP/1.1 keep-alive connections if +enable+ is set to false. If # you are experiencing "too many connection resets" errors setting this to # false will eliminate them. # # You should first investigate reducing idle_timeout. def keep_alive= enable @agent.keep_alive = enable end ## # The current logger. If no logger has been set Mechanize.log is used. def log @log || Mechanize.log end ## # Sets the +logger+ used by this instance of mechanize def log= logger @log = logger end ## # Responses larger than this will be written to a Tempfile instead of stored # in memory. The default is 100,000 bytes. # # A value of nil disables creation of Tempfiles. def max_file_buffer @agent.max_file_buffer end ## # Sets the maximum size of a response body that will be stored in memory to # +bytes+. A value of nil causes all response bodies to be stored in # memory. # # Note that for Mechanize::Download subclasses, the maximum buffer size # multiplied by the number of pages stored in history (controlled by # #max_history) is an approximate upper limit on the amount of memory # Mechanize will use. By default, Mechanize can use up to ~5MB to store # response bodies for non-File and non-Page (HTML) responses. # # See also the discussion under #max_history= def max_file_buffer= bytes @agent.max_file_buffer = bytes end ## # Length of time to wait until a connection is opened in seconds def open_timeout @agent.open_timeout end ## # Sets the connection open timeout to +open_timeout+ def open_timeout= open_timeout @agent.open_timeout = open_timeout end ## # Length of time to wait for data from the server def read_timeout @agent.read_timeout end ## # Sets the timeout for each chunk of data read from the server to # +read_timeout+. A single request may read many chunks of data. def read_timeout= read_timeout @agent.read_timeout = read_timeout end ## # Controls how mechanize deals with redirects. The following values are # allowed: # # :all, true:: All 3xx redirects are followed (default) # :permanent:: Only 301 Moved Permanantly redirects are followed # false:: No redirects are followed def redirect_ok @agent.redirect_ok end alias follow_redirect? redirect_ok ## # Sets the mechanize redirect handling policy. See redirect_ok for allowed # values def redirect_ok= follow @agent.redirect_ok = follow end alias follow_redirect= redirect_ok= ## # Maximum number of redirections to follow def redirection_limit @agent.redirection_limit end ## # Sets the maximum number of redirections to follow to +limit+ def redirection_limit= limit @agent.redirection_limit = limit end ## # Resolve the full path of a link / uri def resolve link @agent.resolve link end ## # A hash of custom request headers that will be sent on every request def request_headers @agent.request_headers end ## # Replaces the custom request headers that will be sent on every request # with +request_headers+ def request_headers= request_headers @agent.request_headers = request_headers end ## # Retry POST and other non-idempotent requests. See RFC 2616 9.1.2. def retry_change_requests @agent.retry_change_requests end ## # When setting +retry_change_requests+ to true you are stating that, for all # the URLs you access with mechanize, making POST and other non-idempotent # requests is safe and will not cause data duplication or other harmful # results. # # If you are experiencing "too many connection resets" errors you should # instead investigate reducing the idle_timeout or disabling keep_alive # connections. def retry_change_requests= retry_change_requests @agent.retry_change_requests = retry_change_requests end ## # Will /robots.txt files be obeyed? def robots @agent.robots end ## # When +enabled+ mechanize will retrieve and obey robots.txt # files def robots= enabled @agent.robots = enabled end ## # The handlers for HTTP and other URI protocols. def scheme_handlers @agent.scheme_handlers end ## # Replaces the URI scheme handler table with +scheme_handlers+ def scheme_handlers= scheme_handlers @agent.scheme_handlers = scheme_handlers end ## # The identification string for the client initiating a web request def user_agent @agent.user_agent end ## # Sets the User-Agent used by mechanize to +user_agent+. See also # user_agent_alias def user_agent= user_agent @agent.user_agent = user_agent end ## # Set the user agent for the Mechanize object based on the given +name+. # # See also AGENT_ALIASES def user_agent_alias= name self.user_agent = AGENT_ALIASES[name] || raise(ArgumentError, "unknown agent alias #{name.inspect}") end ## # The value of watch_for_set is passed to pluggable parsers for retrieved # content attr_accessor :watch_for_set # :section: SSL # # SSL settings for mechanize. These must be set in the block given to # Mechanize.new ## # Path to an OpenSSL server certificate file def ca_file @agent.ca_file end ## # Sets the certificate file used for SSL connections def ca_file= ca_file @agent.ca_file = ca_file end ## # An OpenSSL client certificate or the path to a certificate file. def cert @agent.certificate end ## # Sets the OpenSSL client certificate +cert+ to the given path or # certificate instance def cert= cert @agent.certificate = cert end ## # An OpenSSL certificate store for verifying server certificates. This # defaults to the default certificate store for your system. # # If your system does not ship with a default set of certificates you can # retrieve a copy of the set from Mozilla here: # http://curl.haxx.se/docs/caextract.html # # (Note that this set does not have an HTTPS download option so you may # wish to use the firefox-db2pem.sh script to extract the certificates # from a local install to avoid man-in-the-middle attacks.) # # After downloading or generating a cacert.pem from the above link you # can create a certificate store from the pem file like this: # # cert_store = OpenSSL::X509::Store.new # cert_store.add_file 'cacert.pem' # # And have mechanize use it with: # # agent.cert_store = cert_store def cert_store @agent.cert_store end ## # Sets the OpenSSL certificate store to +store+. # # See also #cert_store def cert_store= cert_store @agent.cert_store = cert_store end ## # What is this? # # Why is it different from #cert? def certificate # :nodoc: @agent.certificate end ## # An OpenSSL private key or the path to a private key def key @agent.private_key end ## # Sets the OpenSSL client +key+ to the given path or key instance. If a # path is given, the path must contain an RSA key file. def key= key @agent.private_key = key end ## # OpenSSL client key password def pass @agent.pass end ## # Sets the client key password to +pass+ def pass= pass @agent.pass = pass end ## # SSL version to use. def ssl_version @agent.ssl_version end ## # Sets the SSL version to use to +version+ without client/server # negotiation. def ssl_version= ssl_version @agent.ssl_version = ssl_version end ## # A callback for additional certificate verification. See # OpenSSL::SSL::SSLContext#verify_callback # # The callback can be used for debugging or to ignore errors by always # returning +true+. Specifying nil uses the default method that was valid # when the SSLContext was created def verify_callback @agent.verify_callback end ## # Sets the OpenSSL certificate verification callback def verify_callback= verify_callback @agent.verify_callback = verify_callback end ## # the OpenSSL server certificate verification method. The default is # OpenSSL::SSL::VERIFY_PEER and certificate verification uses the default # system certificates. See also cert_store def verify_mode @agent.verify_mode end ## # Sets the OpenSSL server certificate verification method. def verify_mode= verify_mode @agent.verify_mode = verify_mode end # :section: Utilities attr_reader :agent # :nodoc: ## # Parses the +body+ of the +response+ from +uri+ using the pluggable parser # that matches its content type def parse uri, response, body content_type = nil unless response['Content-Type'].nil? data, = response['Content-Type'].split ';', 2 content_type, = data.downcase.split ',', 2 unless data.nil? end parser_klass = @pluggable_parser.parser content_type unless parser_klass <= Mechanize::Download then body = case body when IO, Tempfile, StringIO then body.read else body end end parser_klass.new uri, response, body, response.code do |parser| parser.mech = self if parser.respond_to? :mech= parser.watch_for_set = @watch_for_set if @watch_for_set and parser.respond_to?(:watch_for_set=) end end def pretty_print(q) # :nodoc: q.object_group(self) { q.breakable q.pp cookie_jar q.breakable q.pp current_page } end ## # Sets the proxy +address+ at +port+ with an optional +user+ and +password+ def set_proxy address, port, user = nil, password = nil @proxy_addr = address @proxy_port = port @proxy_user = user @proxy_pass = password @agent.set_proxy address, port, user, password end ## # Clears history and cookies. def reset @agent.reset end ## # Shuts down this session by clearing browsing state and closing all # persistent connections. def shutdown reset @agent.shutdown end private ## # Posts +form+ to +uri+ def post_form(uri, form, headers = {}) cur_page = form.page || current_page || Page.new request_data = form.request_data log.debug("query: #{ request_data.inspect }") if log headers = { 'Content-Type' => form.enctype, 'Content-Length' => request_data.size.to_s, }.merge headers # fetch the page page = @agent.fetch uri, :post, headers, [request_data], cur_page add_to_history(page) page end ## # Adds +page+ to the history def add_to_history(page) @agent.history.push(page, @agent.resolve(page.uri)) @history_added.call(page) if @history_added end end require 'mechanize/element_not_found_error' require 'mechanize/response_read_error' require 'mechanize/chunked_termination_error' require 'mechanize/content_type_error' require 'mechanize/cookie' require 'mechanize/cookie_jar' require 'mechanize/parser' require 'mechanize/download' require 'mechanize/directory_saver' require 'mechanize/file' require 'mechanize/file_connection' require 'mechanize/file_request' require 'mechanize/file_response' require 'mechanize/form' require 'mechanize/history' require 'mechanize/http' require 'mechanize/http/agent' require 'mechanize/http/auth_challenge' require 'mechanize/http/auth_realm' require 'mechanize/http/content_disposition_parser' require 'mechanize/http/www_authenticate_parser' require 'mechanize/image' require 'mechanize/page' require 'mechanize/pluggable_parsers' require 'mechanize/redirect_limit_reached_error' require 'mechanize/redirect_not_get_or_head_error' require 'mechanize/response_code_error' require 'mechanize/robots_disallowed_error' require 'mechanize/unauthorized_error' require 'mechanize/unsupported_scheme_error' require 'mechanize/util' mechanize-2.7.5/mechanize.gemspec0000644000004100000410000000444312772546476017061 0ustar www-datawww-data# coding: utf-8 lib = File.expand_path('../lib', __FILE__) $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) require 'mechanize/version' Gem::Specification.new do |spec| spec.name = "mechanize" spec.version = Mechanize::VERSION spec.homepage = "http://docs.seattlerb.org/mechanize/" spec.summary = %q{The Mechanize library is used for automating interaction with websites} spec.description = [ "The Mechanize library is used for automating interaction with websites.", "Mechanize automatically stores and sends cookies, follows redirects,", "and can follow links and submit forms. Form fields can be populated and", "submitted. Mechanize also keeps track of the sites that you have visited as", "a history." ].join("\n") spec.authors = [ 'Eric Hodel', 'Aaron Patterson', 'Mike Dalessio', 'Akinori MUSHA', 'Lee Jarvis' ] spec.email = [ 'drbrain@segment7.net', 'aaronp@rubyforge.org', 'mike.dalessio@gmail.com', 'knu@idaemons.org', 'ljjarvis@gmail.com' ] spec.license = "MIT" spec.require_paths = ["lib"] spec.files = `git ls-files`.split($/) spec.test_files = spec.files.grep(%r{^test/}) spec.extra_rdoc_files += Dir['*.rdoc'] spec.rdoc_options = ["--main", "README.rdoc"] spec.required_ruby_version = ">= 1.9.2" spec.add_runtime_dependency "net-http-digest_auth", [ ">= 1.1.1", "~> 1.1" ] spec.add_runtime_dependency "net-http-persistent", [ ">= 2.5.2", "~> 2.5" ] if RUBY_VERSION >= "2.0" spec.add_runtime_dependency "mime-types", [ ">= 1.17.2" ] else spec.add_runtime_dependency "mime-types", [ ">= 1.17.2", "< 3" ] end spec.add_runtime_dependency "http-cookie", [ "~> 1.0" ] spec.add_runtime_dependency "nokogiri", [ "~> 1.6" ] spec.add_runtime_dependency "ntlm-http", [ ">= 0.1.1", "~> 0.1" ] spec.add_runtime_dependency "webrobots", [ "< 0.2", ">= 0.0.9" ] spec.add_runtime_dependency "domain_name", [ ">= 0.5.1", "~> 0.5" ] spec.add_development_dependency "rake" spec.add_development_dependency "bundler", "~> 1.3" spec.add_development_dependency "rdoc", "~> 4.0" spec.add_development_dependency "minitest", "~> 5.0" end mechanize-2.7.5/test/0000755000004100000410000000000012772546476014523 5ustar www-datawww-datamechanize-2.7.5/test/data/0000755000004100000410000000000012772546476015434 5ustar www-datawww-datamechanize-2.7.5/test/data/server.pem0000644000004100000410000000156712772546476017456 0ustar www-datawww-data-----BEGIN RSA PRIVATE KEY----- MIICXAIBAAKBgQCqZ5iOGOLQc90ibB0dvEKFK+yGMZKmw/Ko6oCDdC1zrJchcohs zSnGuS59gLvAmS8groLf77rY31fhKtG5dB8GynaOh4z+kcZhl+hCll+zTq7KH/rP eg4S5iWllm7b6j/HssvTzSJyo1+p/+8LFXrULrY4Tcv3AJK4elDrI8ghrwIDAQAB AoGAC+iZfLS4hSDTv2gW0NErROtA6E/mk8j12GArAwTHeGIDXc8HQbNEzCJ84UBx 3o/V/06yzruOL0HMfmvjpDY9RLsH02xZb2F/lruw4MJLu50i/Zu8Sjmb1YPSfCh/ 3+8lREA3Uznlq+wHC3yPxQzMBy5jaEdH4IKxT0Bq8TeF0AECQQDSpL47YpRVRsLn sS00ndEgQQmT5AJWJJtPpbHk6AA0a+zdNeuDRbdF42zG483YEqU7meZbPKR8QbkK ZQPEBuevAkEAzxjGcz6NZesmN/NQOtOpylewEs1bdIJyBIBmcnmkimLBtdxd0t34 wUKVHLDSj2aemuAHHwsyn/BNXs6F+obmAQJBALpbkAXAAFW1xefvo3vih8sOXyfd WIfX2SRNBqbq7otyVFudQaChBDUrsOgBUPLyBAdH8DoV27wm9UuR9RPvu/cCQFRr WgICXqtMFtE56tuACreD1S9k7MHqpsW0/Y3ujicnKKWUhd5+Q3esR5JhdgOkpkSl y+FYtDNERpW+BBliwgECQA+Vc7pnxwDIOP8kFumdAUmRmhEZjuwArFcywPzrCUn9 4/KBOp5wDN7kanBwNGZCZ/eQtkb6thAS8C9pufHD1lw= -----END RSA PRIVATE KEY----- mechanize-2.7.5/test/data/server.crt0000644000004100000410000000170412772546476017456 0ustar www-datawww-data-----BEGIN CERTIFICATE----- MIICmzCCAgQCCQDq2kM3TCIM0DANBgkqhkiG9w0BAQQFADCBkTELMAkGA1UEBhMC VVMxEzARBgNVBAgTCldhc2hpbmd0b24xEDAOBgNVBAcTB1NlYXR0bGUxEjAQBgNV BAoTCU1lY2hhbml6ZTESMBAGA1UECxMJTWVjaGFuaXplMQ4wDAYDVQQDEwVBYXJv bjEjMCEGCSqGSIb3DQEJARYUYWFyb25wQHJ1Ynlmb3JnZS5vcmcwHhcNMDYwODIz MDU0NTMwWhcNMDcwODIzMDU0NTMwWjCBkTELMAkGA1UEBhMCVVMxEzARBgNVBAgT Cldhc2hpbmd0b24xEDAOBgNVBAcTB1NlYXR0bGUxEjAQBgNVBAoTCU1lY2hhbml6 ZTESMBAGA1UECxMJTWVjaGFuaXplMQ4wDAYDVQQDEwVBYXJvbjEjMCEGCSqGSIb3 DQEJARYUYWFyb25wQHJ1Ynlmb3JnZS5vcmcwgZ8wDQYJKoZIhvcNAQEBBQADgY0A MIGJAoGBAKpnmI4Y4tBz3SJsHR28QoUr7IYxkqbD8qjqgIN0LXOslyFyiGzNKca5 Ln2Au8CZLyCugt/vutjfV+Eq0bl0HwbKdo6HjP6RxmGX6EKWX7NOrsof+s96DhLm JaWWbtvqP8eyy9PNInKjX6n/7wsVetQutjhNy/cAkrh6UOsjyCGvAgMBAAEwDQYJ KoZIhvcNAQEEBQADgYEAGtqgxn1fh0X5MxDG1yMp5aGcZ6HhtEtlm5S0ZsRnMsqU Hh6Bd57+zUQ66XnLCbQN2cwNeeSoqtI16Ccc1I5cAhQnIZESMsPG21i1BnpEhKph HfNFNpWI/upT2EXNUM6Vx2Kk2aCw2ysrD2pHpsTo5bCOly00uK1ZkoJVQMTL4gU= -----END CERTIFICATE----- mechanize-2.7.5/test/data/server.key0000644000004100000410000000156712772546476017465 0ustar www-datawww-data-----BEGIN RSA PRIVATE KEY----- MIICXAIBAAKBgQCqZ5iOGOLQc90ibB0dvEKFK+yGMZKmw/Ko6oCDdC1zrJchcohs zSnGuS59gLvAmS8groLf77rY31fhKtG5dB8GynaOh4z+kcZhl+hCll+zTq7KH/rP eg4S5iWllm7b6j/HssvTzSJyo1+p/+8LFXrULrY4Tcv3AJK4elDrI8ghrwIDAQAB AoGAC+iZfLS4hSDTv2gW0NErROtA6E/mk8j12GArAwTHeGIDXc8HQbNEzCJ84UBx 3o/V/06yzruOL0HMfmvjpDY9RLsH02xZb2F/lruw4MJLu50i/Zu8Sjmb1YPSfCh/ 3+8lREA3Uznlq+wHC3yPxQzMBy5jaEdH4IKxT0Bq8TeF0AECQQDSpL47YpRVRsLn sS00ndEgQQmT5AJWJJtPpbHk6AA0a+zdNeuDRbdF42zG483YEqU7meZbPKR8QbkK ZQPEBuevAkEAzxjGcz6NZesmN/NQOtOpylewEs1bdIJyBIBmcnmkimLBtdxd0t34 wUKVHLDSj2aemuAHHwsyn/BNXs6F+obmAQJBALpbkAXAAFW1xefvo3vih8sOXyfd WIfX2SRNBqbq7otyVFudQaChBDUrsOgBUPLyBAdH8DoV27wm9UuR9RPvu/cCQFRr WgICXqtMFtE56tuACreD1S9k7MHqpsW0/Y3ujicnKKWUhd5+Q3esR5JhdgOkpkSl y+FYtDNERpW+BBliwgECQA+Vc7pnxwDIOP8kFumdAUmRmhEZjuwArFcywPzrCUn9 4/KBOp5wDN7kanBwNGZCZ/eQtkb6thAS8C9pufHD1lw= -----END RSA PRIVATE KEY----- mechanize-2.7.5/test/data/htpasswd0000644000004100000410000000002312772546476017207 0ustar www-datawww-datamech:44E/qORekFV0E mechanize-2.7.5/test/data/server.csr0000644000004100000410000000130412772546476017451 0ustar www-datawww-data-----BEGIN CERTIFICATE REQUEST----- MIIB0jCCATsCAQAwgZExCzAJBgNVBAYTAlVTMRMwEQYDVQQIEwpXYXNoaW5ndG9u MRAwDgYDVQQHEwdTZWF0dGxlMRIwEAYDVQQKEwlNZWNoYW5pemUxEjAQBgNVBAsT CU1lY2hhbml6ZTEOMAwGA1UEAxMFQWFyb24xIzAhBgkqhkiG9w0BCQEWFGFhcm9u cEBydWJ5Zm9yZ2Uub3JnMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCqZ5iO GOLQc90ibB0dvEKFK+yGMZKmw/Ko6oCDdC1zrJchcohszSnGuS59gLvAmS8groLf 77rY31fhKtG5dB8GynaOh4z+kcZhl+hCll+zTq7KH/rPeg4S5iWllm7b6j/HssvT zSJyo1+p/+8LFXrULrY4Tcv3AJK4elDrI8ghrwIDAQABoAAwDQYJKoZIhvcNAQEE BQADgYEAT7SPe71NQvT2BYGEmbWb7FlSQrPh+rDQMHt/Akb8+r91NLkxZtbD1e/F iyI9JloPCEwJXxHBl0VVRpFCRuJNN0z0E/G4NUWu6n+ZkihtnmV6uazzAQmD4pTl SjoiyVLWU+r4Q4yXWXtJ9GR8Attv32fL3PcP+GGLeurXJAn0MNU= -----END CERTIFICATE REQUEST----- mechanize-2.7.5/test/test_mechanize_page_frame.rb0000644000004100000410000000056712772546476022230 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizePageFrame < Mechanize::TestCase def test_content page = page 'http://example/referer' frame = node 'frame', 'name' => 'frame1', 'src' => 'http://example/' frame = Mechanize::Page::Frame.new frame, @mech, page frame.content assert_equal 'http://example/referer', requests.first['Referer'] end end mechanize-2.7.5/test/test_mechanize_form_check_box.rb0000644000004100000410000000133512772546476023104 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeFormCheckBox < Mechanize::TestCase def setup super @page = @mech.get('http://localhost/tc_checkboxes.html') end def test_check form = @page.forms.first form.checkbox_with(:name => 'green').check assert(form.checkbox_with(:name => 'green').checked) %w{ red blue yellow brown }.each do |color| assert_equal(false, form.checkbox_with(:name => color).checked) end end def test_uncheck form = @page.forms.first checkbox = form.checkbox_with(:name => 'green') checkbox.check assert form.checkbox_with(:name => 'green').checked checkbox.uncheck assert !form.checkbox_with(:name => 'green').checked end end mechanize-2.7.5/test/test_mechanize_form_textarea.rb0000644000004100000410000000304712772546476022776 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeFormTextarea < Mechanize::TestCase def setup super @page = @mech.get("http://localhost/tc_textarea.html") end def test_empty_text_area form = @page.forms_with(:name => 'form1').first assert_equal('', form.field_with(:name => 'text1').value) form.text1 = 'Hello World' assert_equal('Hello World', form.field_with(:name => 'text1').value) page = @mech.submit(form) assert_equal(1, page.links.length) assert_equal('text1:Hello World', page.links[0].text) end def test_non_empty_textfield form = @page.forms_with(:name => 'form2').first assert_equal('sample text', form.field_with(:name => 'text1').value) page = @mech.submit(form) assert_equal(1, page.links.length) assert_equal('text1:sample text', page.links[0].text) end def test_multi_textfield form = @page.form_with(:name => 'form3') assert_equal(2, form.fields_with(:name => 'text1').length) assert_equal('', form.fields_with(:name => 'text1')[0].value) assert_equal('sample text', form.fields_with(:name => 'text1')[1].value) form.text1 = 'Hello World' assert_equal('Hello World', form.fields_with(:name => 'text1')[0].value) assert_equal('sample text', form.fields_with(:name => 'text1')[1].value) page = @mech.submit(form) assert_equal(2, page.links.length) link = page.links_with(:text => 'text1:sample text') assert_equal(1, link.length) link = page.links_with(:text => 'text1:Hello World') assert_equal(1, link.length) end end mechanize-2.7.5/test/test_mechanize_file_connection.rb0000644000004100000410000000061012772546476023265 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeFileConnection < Mechanize::TestCase def test_request uri = URI.parse "file://#{File.expand_path __FILE__}" conn = Mechanize::FileConnection.new body = '' conn.request uri, nil do |response| response.read_body do |part| body << part end end assert_equal File.read(__FILE__), body end end mechanize-2.7.5/test/test_mechanize_history.rb0000644000004100000410000000322612772546476021636 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeHistory < Mechanize::TestCase def setup super @uri = URI 'http://example/' @uri2 = @uri + '/a' @history = Mechanize::History.new end def test_initialize assert_empty @history end def test_clear @history.push :page, @uri @history.clear assert_empty @history end def test_pop assert_nil @history.pop @history.push :page1, @uri @history.push :page2, @uri2 assert_equal :page2, @history.pop refute_empty @history end def test_push p1 = page @uri obj = @history.push p1 assert_same @history, obj assert_equal 1, @history.length p2 = page @uri2 @history.push p2 assert_equal 2, @history.length end def test_push_max_size @history = Mechanize::History.new 2 @history.push :page1, @uri assert_equal 1, @history.length @history.push :page2, @uri assert_equal 2, @history.length @history.push :page3, @uri assert_equal 2, @history.length end def test_push_uri obj = @history.push :page, @uri assert_same @history, obj assert_equal 1, @history.length @history.push :page2, @uri assert_equal 2, @history.length end def test_shift assert_nil @history.shift @history.push :page1, @uri @history.push :page2, @uri2 page = @history.shift assert_equal :page1, page refute_empty @history @history.shift assert_empty @history end def test_visited_eh refute @history.visited? @uri @history.push page @uri assert @history.visited? URI('http://example') assert @history.visited? URI('http://example/') end end mechanize-2.7.5/test/test_mechanize_http_auth_challenge.rb0000644000004100000410000000254612772546476024143 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeHttpAuthChallenge < Mechanize::TestCase def setup super @uri = URI 'http://example/' @AR = Mechanize::HTTP::AuthRealm @AC = Mechanize::HTTP::AuthChallenge @challenge = @AC.new 'Digest', { 'realm' => 'r' }, 'Digest realm=r' end def test_realm_basic @challenge.scheme = 'Basic' expected = @AR.new 'Basic', @uri, 'r' assert_equal expected, @challenge.realm(@uri + '/foo') end def test_realm_digest expected = @AR.new 'Digest', @uri, 'r' assert_equal expected, @challenge.realm(@uri + '/foo') end def test_realm_digest_case challenge = @AC.new 'Digest', { 'realm' => 'R' }, 'Digest realm=R' expected = @AR.new 'Digest', @uri, 'R' assert_equal expected, challenge.realm(@uri + '/foo') end def test_realm_unknown @challenge.scheme = 'Unknown' e = assert_raises Mechanize::Error do @challenge.realm(@uri + '/foo') end assert_equal 'unknown HTTP authentication scheme Unknown', e.message end def test_realm_name assert_equal 'r', @challenge.realm_name end def test_realm_name_case challenge = @AC.new 'Digest', { 'realm' => 'R' }, 'Digest realm=R' assert_equal 'R', challenge.realm_name end def test_realm_name_ntlm challenge = @AC.new 'Negotiate, NTLM' assert_nil challenge.realm_name end end mechanize-2.7.5/test/test_mechanize_image.rb0000644000004100000410000000016212772546476021213 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeImage < Mechanize::TestCase # empty subclass, no tests end mechanize-2.7.5/test/test_multi_select.rb0000644000004100000410000001021412772546476020576 0ustar www-datawww-datarequire 'mechanize/test_case' class MultiSelectTest < Mechanize::TestCase def setup super @page = @mech.get("http://localhost/form_multi_select.html") @form = @page.forms.first end def test_option_with o = @form.field_with(:name => 'list').option_with(:value => '1') assert_equal '1', o.value end def test_options_with os = @form.field_with(:name => 'list').options_with(:value => /1|2/) assert_equal ['1', '2'].sort, os.map { |x| x.value }.sort end def test_select_none page = @mech.get("http://localhost/form_multi_select.html") form = page.forms.first form.field_with(:name => 'list').select_none page = @mech.submit(form) assert_equal(0, page.links.length) end def test_select_all page = @mech.get("http://localhost/form_multi_select.html") form = page.forms.first form.field_with(:name => 'list').select_all page = @mech.submit(form) assert_equal(6, page.links.length) assert_equal(1, page.links_with(:text => 'list:1').length) assert_equal(1, page.links_with(:text => 'list:2').length) assert_equal(1, page.links_with(:text => 'list:3').length) assert_equal(1, page.links_with(:text => 'list:4').length) assert_equal(1, page.links_with(:text => 'list:5').length) assert_equal(1, page.links_with(:text => 'list:6').length) end def test_click_all page = @mech.get("http://localhost/form_multi_select.html") form = page.forms.first form.field_with(:name => 'list').options.each { |o| o.click } page = @mech.submit(form) assert_equal(5, page.links.length) assert_equal(1, page.links_with(:text => 'list:1').length) assert_equal(1, page.links_with(:text => 'list:3').length) assert_equal(1, page.links_with(:text => 'list:4').length) assert_equal(1, page.links_with(:text => 'list:5').length) assert_equal(1, page.links_with(:text => 'list:6').length) end def test_select_default page = @mech.get("http://localhost/form_multi_select.html") form = page.forms.first page = @mech.submit(form) assert_equal(1, page.links.length) assert_equal(1, page.links_with(:text => 'list:2').length) end def test_select_one page = @mech.get("http://localhost/form_multi_select.html") form = page.forms.first form.list = 'Aaron' assert_equal(['Aaron'], form.list) page = @mech.submit(form) assert_equal(1, page.links.length) assert_equal('list:Aaron', page.links.first.text) end def test_select_two page = @mech.get("http://localhost/form_multi_select.html") form = page.forms.first form.list = ['1', 'Aaron'] page = @mech.submit(form) assert_equal(2, page.links.length) assert_equal(1, page.links_with(:text => 'list:1').length) assert_equal(1, page.links_with(:text => 'list:Aaron').length) end def test_select_three page = @mech.get("http://localhost/form_multi_select.html") form = page.forms.first form.list = ['1', '2', '3'] page = @mech.submit(form) assert_equal(3, page.links.length) assert_equal(1, page.links_with(:text => 'list:1').length) assert_equal(1, page.links_with(:text => 'list:2').length) assert_equal(1, page.links_with(:text => 'list:3').length) end def test_select_three_twice page = @mech.get("http://localhost/form_multi_select.html") form = page.forms.first form.list = ['1', '2', '3'] form.list = ['1', '2', '3'] page = @mech.submit(form) assert_equal(3, page.links.length) assert_equal(1, page.links_with(:text => 'list:1').length) assert_equal(1, page.links_with(:text => 'list:2').length) assert_equal(1, page.links_with(:text => 'list:3').length) end def test_select_with_click page = @mech.get("http://localhost/form_multi_select.html") form = page.forms.first form.list = ['1', 'Aaron'] form.field_with(:name => 'list').options[3].tick assert_equal(['1', 'Aaron', '4'].sort, form.list.sort) page = @mech.submit(form) assert_equal(3, page.links.length) assert_equal(1, page.links_with(:text => 'list:1').length) assert_equal(1, page.links_with(:text => 'list:Aaron').length) assert_equal(1, page.links_with(:text => 'list:4').length) end end mechanize-2.7.5/test/test_mechanize_form_option.rb0000644000004100000410000000204212772546476022463 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeFormOption < Mechanize::TestCase def setup super page = html_page <<-BODY
    BODY form = page.forms.first @select = form.fields.first @option1 = @select.options.first @option2 = @select.options.last end def test_inspect assert_match "value: 2", @select.inspect end def test_value_missing_value option = node 'option' option.inner_html = 'blah' option = Mechanize::Form::Option.new option, nil assert_equal 'blah', option.value end def test_click @option1.click assert @option1.selected? end def test_select @option1.select assert @option1.selected? end def test_unselect @option2.unselect refute @option2.selected? end def test_selected_eh refute @option1.selected? assert @option2.selected? end end mechanize-2.7.5/test/test_mechanize_file_request.rb0000644000004100000410000000074612772546476022630 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeFileRequest < Mechanize::TestCase def setup @uri = URI.parse 'file:///nonexistent' @r = Mechanize::FileRequest.new @uri end def test_initialize assert_equal @uri, @r.uri assert_equal '/nonexistent', @r.path assert_respond_to @r, :[]= assert_respond_to @r, :add_field assert_respond_to @r, :each_header end def test_response_body_permitted_eh assert @r.response_body_permitted? end end mechanize-2.7.5/test/test_mechanize_form_encoding.rb0000644000004100000410000000626512772546476022754 0ustar www-datawww-data# coding: utf-8 require 'mechanize/test_case' class TestMechanizeFormEncoding < Mechanize::TestCase # See also: tests of Util.from_native_charset # Encoding test should do with non-utf-8 characters INPUTTED_VALUE = "テスト" # "test" in Japanese UTF-8 encoding CONTENT_ENCODING = 'Shift_JIS' # one of Japanese encoding encoded_value = "\x83\x65\x83\x58\x83\x67".force_encoding(::Encoding::SHIFT_JIS) # "test" in Japanese Shift_JIS encoding EXPECTED_QUERY = "first_name=#{CGI.escape(encoded_value)}&first_name=&gender=&green%5Beggs%5D=" ENCODING_ERRORS = [EncodingError, Encoding::ConverterNotFoundError] # and so on ENCODING_LOG_MESSAGE = /INFO -- : form encoding: Shift_JIS/ INVALID_ENCODING = 'UTF-eight' def set_form_with_encoding(enc) page = @mech.get("http://localhost/form_set_fields.html") form = page.forms.first form.encoding = enc form['first_name'] = INPUTTED_VALUE form end def test_form_encoding_returns_accept_charset page = @mech.get("http://localhost/rails_3_encoding_hack_form_test.html") form = page.forms.first accept_charset = form.form_node['accept-charset'] assert accept_charset assert_equal accept_charset, form.encoding refute_equal page.encoding, form.encoding end def test_form_encoding_returns_page_encoding_when_no_accept_charset page = @mech.get("http://localhost/form_set_fields.html") form = page.forms.first accept_charset = form.form_node['accept-charset'] assert_nil accept_charset refute_equal accept_charset, form.encoding assert_equal page.encoding, form.encoding end def test_form_encoding_equals_sets_new_encoding page = @mech.get("http://localhost/form_set_fields.html") form = page.forms.first refute_equal CONTENT_ENCODING, form.encoding form.encoding = CONTENT_ENCODING assert_equal CONTENT_ENCODING, form.encoding end def test_form_encoding_returns_nil_when_no_page_in_initialize # this sequence is seen at Mechanize#post(url, query_hash) node = {} # Create a fake form class << node def search(*args); []; end end node['method'] = 'POST' node['enctype'] = 'application/x-www-form-urlencoded' form = Mechanize::Form.new(node) assert_equal nil, form.encoding end def test_post_form_with_form_encoding form = set_form_with_encoding CONTENT_ENCODING form.submit # we can not use "links.find{|l| l.text == 'key:val'}" assertion here # because the link text encoding is always UTF-8 regaredless of html encoding assert EXPECTED_QUERY, @mech.page.at('div#query').inner_text end def test_post_form_with_problematic_encoding form = set_form_with_encoding INVALID_ENCODING assert_raises(*ENCODING_ERRORS){ form.submit } end def test_form_ignore_encoding_error_is_true form = set_form_with_encoding INVALID_ENCODING form.ignore_encoding_error = true form.submit # HACK no assertions end def test_post_form_logs_form_encoding sio = StringIO.new @mech.log = Logger.new(sio) @mech.log.level = Logger::INFO form = set_form_with_encoding CONTENT_ENCODING form.submit assert_match ENCODING_LOG_MESSAGE, sio.string @mech.log = nil end end mechanize-2.7.5/test/test_mechanize_cookie_jar.rb0000644000004100000410000004274512772546476022253 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeCookieJar < Mechanize::TestCase def setup super @jar = Mechanize::CookieJar.new end def cookie_values(options = {}) { :name => 'Foo', :value => 'Bar', :path => '/', :expires => Time.now + (10 * 86400), :for_domain => true, :domain => 'rubyforge.org' }.merge(options) end def test_two_cookies_same_domain_and_name_different_paths url = URI 'http://rubyforge.org/' cookie = Mechanize::Cookie.new(cookie_values) @jar.add(url, cookie) @jar.add(url, Mechanize::Cookie.new(cookie_values(:path => '/onetwo'))) assert_equal(1, @jar.cookies(url).length) assert_equal 2, @jar.cookies(URI('http://rubyforge.org/onetwo')).length end def test_domain_case url = URI 'http://rubyforge.org/' # Add one cookie with an expiration date in the future cookie = Mechanize::Cookie.new(cookie_values) @jar.add(url, cookie) assert_equal(1, @jar.cookies(url).length) @jar.add(url, Mechanize::Cookie.new( cookie_values(:domain => 'RuByForge.Org', :name => 'aaron'))) assert_equal(2, @jar.cookies(url).length) url2 = URI 'http://RuByFoRgE.oRg/' assert_equal(2, @jar.cookies(url2).length) end def test_host_only url = URI.parse('http://rubyforge.org/') @jar.add(url, Mechanize::Cookie.new( cookie_values(:domain => 'rubyforge.org', :for_domain => false))) assert_equal(1, @jar.cookies(url).length) assert_equal(1, @jar.cookies(URI('http://RubyForge.org/')).length) assert_equal(1, @jar.cookies(URI('https://RubyForge.org/')).length) assert_equal(0, @jar.cookies(URI('http://www.rubyforge.org/')).length) end def test_empty_value values = cookie_values(:value => "") url = URI 'http://rubyforge.org/' # Add one cookie with an expiration date in the future cookie = Mechanize::Cookie.new(values) @jar.add(url, cookie) assert_equal(1, @jar.cookies(url).length) @jar.add url, Mechanize::Cookie.new(values.merge(:domain => 'RuByForge.Org', :name => 'aaron')) assert_equal(2, @jar.cookies(url).length) url2 = URI 'http://RuByFoRgE.oRg/' assert_equal(2, @jar.cookies(url2).length) end def test_add_future_cookies url = URI 'http://rubyforge.org/' # Add one cookie with an expiration date in the future cookie = Mechanize::Cookie.new(cookie_values) @jar.add(url, cookie) assert_equal(1, @jar.cookies(url).length) # Add the same cookie, and we should still only have one @jar.add(url, Mechanize::Cookie.new(cookie_values)) assert_equal(1, @jar.cookies(url).length) # Make sure we can get the cookie from different paths assert_equal(1, @jar.cookies(URI('http://rubyforge.org/login')).length) # Make sure we can't get the cookie from different domains assert_equal(0, @jar.cookies(URI('http://google.com/')).length) end def test_add_multiple_cookies url = URI 'http://rubyforge.org/' # Add one cookie with an expiration date in the future cookie = Mechanize::Cookie.new(cookie_values) @jar.add(url, cookie) assert_equal(1, @jar.cookies(url).length) # Add the same cookie, and we should still only have one @jar.add(url, Mechanize::Cookie.new(cookie_values(:name => 'Baz'))) assert_equal(2, @jar.cookies(url).length) # Make sure we can get the cookie from different paths assert_equal(2, @jar.cookies(URI('http://rubyforge.org/login')).length) # Make sure we can't get the cookie from different domains assert_equal(0, @jar.cookies(URI('http://google.com/')).length) end def test_add_rejects_cookies_that_do_not_contain_an_embedded_dot url = URI 'http://rubyforge.org/' tld_cookie = Mechanize::Cookie.new(cookie_values(:domain => '.org')) @jar.add(url, tld_cookie) # single dot domain is now treated as no domain # single_dot_cookie = Mechanize::Cookie.new(cookie_values(:domain => '.')) # @jar.add(url, single_dot_cookie) assert_equal(0, @jar.cookies(url).length) end def test_fall_back_rules_for_local_domains url = URI 'http://www.example.local' tld_cookie = Mechanize::Cookie.new(cookie_values(:domain => '.local')) @jar.add(url, tld_cookie) assert_equal(0, @jar.cookies(url).length) sld_cookie = Mechanize::Cookie.new(cookie_values(:domain => '.example.local')) @jar.add(url, sld_cookie) assert_equal(1, @jar.cookies(url).length) end def test_add_makes_exception_for_localhost url = URI 'http://localhost' tld_cookie = Mechanize::Cookie.new(cookie_values(:domain => 'localhost')) @jar.add(url, tld_cookie) assert_equal(1, @jar.cookies(url).length) end def test_add_cookie_for_the_parent_domain url = URI 'http://x.foo.com' cookie = Mechanize::Cookie.new(cookie_values(:domain => '.foo.com')) @jar.add(url, cookie) assert_equal(1, @jar.cookies(url).length) end def test_add_does_not_reject_cookies_from_a_nested_subdomain url = URI 'http://y.x.foo.com' cookie = Mechanize::Cookie.new(cookie_values(:domain => '.foo.com')) @jar.add(url, cookie) assert_equal(1, @jar.cookies(url).length) end def test_cookie_without_leading_dot_does_not_cause_substring_match url = URI 'http://arubyforge.org/' cookie = Mechanize::Cookie.new(cookie_values(:domain => 'rubyforge.org')) @jar.add(url, cookie) assert_equal(0, @jar.cookies(url).length) end def test_cookie_without_leading_dot_matches_subdomains url = URI 'http://admin.rubyforge.org/' cookie = Mechanize::Cookie.new(cookie_values(:domain => 'rubyforge.org')) @jar.add(url, cookie) assert_equal(1, @jar.cookies(url).length) end def test_cookies_with_leading_dot_match_subdomains url = URI 'http://admin.rubyforge.org/' @jar.add(url, Mechanize::Cookie.new(cookie_values(:domain => '.rubyforge.org'))) assert_equal(1, @jar.cookies(url).length) end def test_cookies_with_leading_dot_match_parent_domains url = URI 'http://rubyforge.org/' @jar.add(url, Mechanize::Cookie.new(cookie_values(:domain => '.rubyforge.org'))) assert_equal(1, @jar.cookies(url).length) end def test_cookies_with_leading_dot_match_parent_domains_exactly url = URI 'http://arubyforge.org/' @jar.add(url, Mechanize::Cookie.new(cookie_values(:domain => '.rubyforge.org'))) assert_equal(0, @jar.cookies(url).length) end def test_cookie_for_ipv4_address_matches_the_exact_ipaddress url = URI 'http://192.168.0.1/' cookie = Mechanize::Cookie.new(cookie_values(:domain => '192.168.0.1')) @jar.add(url, cookie) assert_equal(1, @jar.cookies(url).length) end def test_cookie_for_ipv4_address_does_not_cause_subdomain_match url = URI 'http://192.168.0.1/' cookie = Mechanize::Cookie.new(cookie_values(:domain => '.0.1')) @jar.add(url, cookie) assert_equal(0, @jar.cookies(url).length) end def test_cookie_for_ipv6_address_matches_the_exact_ipaddress url = URI 'http://[fe80::0123:4567:89ab:cdef]/' cookie = Mechanize::Cookie.new(cookie_values(:domain => '[fe80::0123:4567:89ab:cdef]')) @jar.add(url, cookie) assert_equal(1, @jar.cookies(url).length) end def test_cookies_dot url = URI 'http://www.host.example/' @jar.add(url, Mechanize::Cookie.new(cookie_values(:domain => 'www.host.example'))) url = URI 'http://wwwxhost.example/' assert_equal(0, @jar.cookies(url).length) end def test_clear_bang url = URI 'http://rubyforge.org/' # Add one cookie with an expiration date in the future cookie = Mechanize::Cookie.new(cookie_values) @jar.add(url, cookie) @jar.add(url, Mechanize::Cookie.new(cookie_values(:name => 'Baz'))) assert_equal(2, @jar.cookies(url).length) @jar.clear! assert_equal(0, @jar.cookies(url).length) end def test_save_cookies_yaml url = URI 'http://rubyforge.org/' # Add one cookie with an expiration date in the future cookie = Mechanize::Cookie.new(cookie_values) s_cookie = Mechanize::Cookie.new(cookie_values(:name => 'Bar', :expires => nil, :session => true)) @jar.add(url, cookie) @jar.add(url, s_cookie) @jar.add(url, Mechanize::Cookie.new(cookie_values(:name => 'Baz'))) assert_equal(3, @jar.cookies(url).length) in_tmpdir do value = @jar.save_as("cookies.yml") assert_same @jar, value jar = Mechanize::CookieJar.new jar.load("cookies.yml") assert_equal(2, jar.cookies(url).length) end assert_equal(3, @jar.cookies(url).length) end def test_save_session_cookies_yaml url = URI 'http://rubyforge.org/' # Add one cookie with an expiration date in the future cookie = Mechanize::Cookie.new(cookie_values) s_cookie = Mechanize::Cookie.new(cookie_values(:name => 'Bar', :expires => nil, :session => true)) @jar.add(url, cookie) @jar.add(url, s_cookie) @jar.add(url, Mechanize::Cookie.new(cookie_values(:name => 'Baz'))) assert_equal(3, @jar.cookies(url).length) in_tmpdir do @jar.save_as("cookies.yml", :format => :yaml, :session => true) jar = Mechanize::CookieJar.new jar.load("cookies.yml") assert_equal(3, jar.cookies(url).length) end assert_equal(3, @jar.cookies(url).length) end def test_save_cookies_cookiestxt url = URI 'http://rubyforge.org/' # Add one cookie with an expiration date in the future cookie = Mechanize::Cookie.new(cookie_values) s_cookie = Mechanize::Cookie.new(cookie_values(:name => 'Bar', :expires => nil, :session => true)) @jar.add(url, cookie) @jar.add(url, s_cookie) @jar.add(url, Mechanize::Cookie.new(cookie_values(:name => 'Baz'))) assert_equal(3, @jar.cookies(url).length) in_tmpdir do @jar.save_as("cookies.txt", :cookiestxt) assert_match(/\A# (?:Netscape )?HTTP Cookie File$/, File.read("cookies.txt")) jar = Mechanize::CookieJar.new jar.load("cookies.txt", :cookiestxt) assert_equal(2, jar.cookies(url).length) end in_tmpdir do @jar.save_as("cookies.txt", :cookiestxt, :session => true) assert_match(/\A# (?:Netscape )?HTTP Cookie File$/, File.read("cookies.txt")) jar = Mechanize::CookieJar.new jar.load("cookies.txt", :cookiestxt) assert_equal(3, jar.cookies(url).length) end assert_equal(3, @jar.cookies(url).length) end def test_expire_cookies url = URI 'http://rubyforge.org/' # Add one cookie with an expiration date in the future cookie = Mechanize::Cookie.new(cookie_values) @jar.add(url, cookie) assert_equal(1, @jar.cookies(url).length) # Add a second cookie @jar.add(url, Mechanize::Cookie.new(cookie_values(:name => 'Baz'))) assert_equal(2, @jar.cookies(url).length) # Make sure we can get the cookie from different paths assert_equal(2, @jar.cookies(URI('http://rubyforge.org/login')).length) # Expire the first cookie @jar.add(url, Mechanize::Cookie.new( cookie_values(:expires => Time.now - (10 * 86400)))) assert_equal(1, @jar.cookies(url).length) # Expire the second cookie @jar.add(url, Mechanize::Cookie.new( cookie_values( :name => 'Baz', :expires => Time.now - (10 * 86400)))) assert_equal(0, @jar.cookies(url).length) end def test_session_cookies values = cookie_values(:expires => nil) url = URI 'http://rubyforge.org/' # Add one cookie with an expiration date in the future cookie = Mechanize::Cookie.new(values) @jar.add(url, cookie) assert_equal(1, @jar.cookies(url).length) # Add a second cookie @jar.add(url, Mechanize::Cookie.new(values.merge(:name => 'Baz'))) assert_equal(2, @jar.cookies(url).length) # Make sure we can get the cookie from different paths assert_equal(2, @jar.cookies(URI('http://rubyforge.org/login')).length) # Expire the first cookie @jar.add(url, Mechanize::Cookie.new(values.merge(:expires => Time.now - (10 * 86400)))) assert_equal(1, @jar.cookies(url).length) # Expire the second cookie @jar.add(url, Mechanize::Cookie.new( values.merge(:name => 'Baz', :expires => Time.now - (10 * 86400)))) assert_equal(0, @jar.cookies(url).length) # When given a URI with a blank path, CookieJar#cookies should return # cookies with the path '/': url = URI 'http://rubyforge.org' assert_equal '', url.path assert_equal(0, @jar.cookies(url).length) # Now add a cookie with the path set to '/': @jar.add(url, Mechanize::Cookie.new(values.merge( :name => 'has_root_path', :path => '/'))) assert_equal(1, @jar.cookies(url).length) end def test_paths values = cookie_values(:path => "/login", :expires => nil) url = URI 'http://rubyforge.org/login' # Add one cookie with an expiration date in the future cookie = Mechanize::Cookie.new(values) @jar.add(url, cookie) assert_equal(1, @jar.cookies(url).length) # Add a second cookie @jar.add(url, Mechanize::Cookie.new(values.merge( :name => 'Baz' ))) assert_equal(2, @jar.cookies(url).length) # Make sure we don't get the cookie in a different path assert_equal(0, @jar.cookies(URI('http://rubyforge.org/hello')).length) assert_equal(0, @jar.cookies(URI('http://rubyforge.org/')).length) # Expire the first cookie @jar.add(url, Mechanize::Cookie.new(values.merge( :expires => Time.now - (10 * 86400)))) assert_equal(1, @jar.cookies(url).length) # Expire the second cookie @jar.add(url, Mechanize::Cookie.new(values.merge( :name => 'Baz', :expires => Time.now - (10 * 86400)))) assert_equal(0, @jar.cookies(url).length) end def test_save_and_read_cookiestxt url = URI 'http://rubyforge.org/' # Add one cookie with an expiration date in the future cookie = Mechanize::Cookie.new(cookie_values) @jar.add(url, cookie) @jar.add(url, Mechanize::Cookie.new(cookie_values(:name => 'Baz'))) assert_equal(2, @jar.cookies(url).length) in_tmpdir do @jar.save_as("cookies.txt", :cookiestxt) @jar.clear! @jar.load("cookies.txt", :cookiestxt) end assert_equal(2, @jar.cookies(url).length) end def test_save_and_read_cookiestxt_with_session_cookies url = URI 'http://rubyforge.org/' @jar.add(url, Mechanize::Cookie.new(cookie_values(:expires => nil))) in_tmpdir do @jar.save_as("cookies.txt", :cookiestxt) @jar.clear! @jar.load("cookies.txt", :cookiestxt) end assert_equal(0, @jar.cookies(url).length) end def test_save_and_read_expired_cookies url = URI 'http://rubyforge.org/' @jar.jar['rubyforge.org'] = {} @jar.add url, Mechanize::Cookie.new(cookie_values) # HACK no asertion end def test_ssl_cookies # thanks to michal "ocher" ochman for reporting the bug responsible for this test. values = cookie_values(:expires => nil) values_ssl = values.merge(:name => 'Baz', :domain => "#{values[:domain]}:443") url = URI 'https://rubyforge.org/login' cookie = Mechanize::Cookie.new(values) @jar.add(url, cookie) assert_equal(1, @jar.cookies(url).length, "did not handle SSL cookie") cookie = Mechanize::Cookie.new(values_ssl) @jar.add(url, cookie) assert_equal(2, @jar.cookies(url).length, "did not handle SSL cookie with :443") end def test_secure_cookie nurl = URI 'http://rubyforge.org/login' surl = URI 'https://rubyforge.org/login' ncookie = Mechanize::Cookie.new(cookie_values(:name => 'Foo1')) scookie = Mechanize::Cookie.new(cookie_values(:name => 'Foo2', :secure => true)) @jar.add(nurl, ncookie) @jar.add(nurl, scookie) @jar.add(surl, ncookie) @jar.add(surl, scookie) assert_equal('Foo1', @jar.cookies(nurl).map { |c| c.name }.sort.join(' ') ) assert_equal('Foo1 Foo2', @jar.cookies(surl).map { |c| c.name }.sort.join(' ') ) end def test_save_cookies_cookiestxt_subdomain top_url = URI 'http://rubyforge.org/' subdomain_url = URI 'http://admin.rubyforge.org/' # cookie1 is for *.rubyforge.org; cookie2 is only for rubyforge.org, no subdomains cookie1 = Mechanize::Cookie.new(cookie_values) cookie2 = Mechanize::Cookie.new(cookie_values(:name => 'Boo', :for_domain => false)) @jar.add(top_url, cookie1) @jar.add(top_url, cookie2) assert_equal(2, @jar.cookies(top_url).length) assert_equal(1, @jar.cookies(subdomain_url).length) in_tmpdir do @jar.save_as("cookies.txt", :cookiestxt) jar = Mechanize::CookieJar.new jar.load("cookies.txt", :cookiestxt) # HACK test the format assert_equal(2, jar.cookies(top_url).length) assert_equal(1, jar.cookies(subdomain_url).length) # Check that we actually wrote the file correctly (not just that we were # able to read what we wrote): # # * Cookies that only match exactly the domain specified must not have a # leading dot, and must have FALSE as the second field. # * Cookies that match subdomains may have a leading dot, and must have # TRUE as the second field. cookies_txt = File.readlines("cookies.txt") assert_equal(1, cookies_txt.grep( /^rubyforge\.org\tFALSE/ ).length) assert_equal(1, cookies_txt.grep( /^\.rubyforge\.org\tTRUE/ ).length) end assert_equal(2, @jar.cookies(top_url).length) assert_equal(1, @jar.cookies(subdomain_url).length) end end mechanize-2.7.5/test/test_mechanize_util.rb0000644000004100000410000000765412772546476021123 0ustar www-datawww-data# coding: utf-8 require 'mechanize/test_case' class TestMechanizeUtil < Mechanize::TestCase INPUTTED_VALUE = "テスト" # "test" in Japanese UTF-8 encoding CONTENT_ENCODING = 'Shift_JIS' # one of Japanese encoding ENCODED_VALUE = "\x83\x65\x83\x58\x83\x67".force_encoding(::Encoding::SHIFT_JIS) # "test" in Japanese Shift_JIS encoding ENCODING_ERRORS = [EncodingError, Encoding::ConverterNotFoundError] # and so on ERROR_LOG_MESSAGE = /from_native_charset: Encoding::ConverterNotFoundError: form encoding: "UTF-eight"/ INVALID_ENCODING = 'UTF-eight' def setup super @MU = Mechanize::Util @result = "not set" end def test_from_native_charset @result = @MU.from_native_charset(INPUTTED_VALUE, CONTENT_ENCODING) assert_equal ENCODED_VALUE, @result end def test_from_native_charset_returns_nil_when_no_string @result = @MU.from_native_charset(nil, CONTENT_ENCODING) assert_equal nil, @result end def test_from_native_charset_doesnot_convert_when_no_encoding @result = @MU.from_native_charset(INPUTTED_VALUE, nil) refute_equal ENCODED_VALUE, @result assert_equal INPUTTED_VALUE, @result end def test_from_native_charset_doesnot_convert_when_not_nokogiri parser = Mechanize.html_parser Mechanize.html_parser = 'Another HTML Parser' @result = @MU.from_native_charset(INPUTTED_VALUE, CONTENT_ENCODING) refute_equal ENCODED_VALUE, @result assert_equal INPUTTED_VALUE, @result ensure Mechanize.html_parser = parser end def test_from_native_charset_raises_error_with_bad_encoding assert_raises(*ENCODING_ERRORS) do @MU.from_native_charset(INPUTTED_VALUE, INVALID_ENCODING) end end def test_from_native_charset_suppress_encoding_error_when_3rd_arg_is_true @MU.from_native_charset(INPUTTED_VALUE, INVALID_ENCODING, true) # HACK no assertion end def test_from_native_charset_doesnot_convert_when_encoding_error_raised_and_ignored @result = @MU.from_native_charset(INPUTTED_VALUE, INVALID_ENCODING, true) refute_equal ENCODED_VALUE, @result assert_equal INPUTTED_VALUE, @result end def test_from_native_charset_logs_form_when_encoding_error_raised sio = StringIO.new("") log = Logger.new(sio) log.level = Logger::DEBUG assert_raises(*ENCODING_ERRORS) do @MU.from_native_charset(INPUTTED_VALUE, INVALID_ENCODING, nil, log) end assert_match ERROR_LOG_MESSAGE, sio.string end def test_from_native_charset_logs_form_when_encoding_error_is_ignored sio = StringIO.new("") log = Logger.new(sio) log.level = Logger::DEBUG @MU.from_native_charset(INPUTTED_VALUE, INVALID_ENCODING, true, log) assert_match ERROR_LOG_MESSAGE, sio.string end def test_self_html_unescape_entity assert_equal '&', @MU::html_unescape('&') assert_equal '&', @MU::html_unescape('&') end def test_uri_escape assert_equal "%25", @MU.uri_escape("%") assert_equal "%", @MU.uri_escape("%", /[^%]/) end def test_build_query_string_simple input_params = [ [:ids, 1], [:action, 'delete'], [:ids, 5], ] expected_params = [ ['ids', '1'], ['action', 'delete'], ['ids', '5'], ] query = @MU.build_query_string(input_params) assert_equal expected_params, URI.decode_www_form(query) end def test_build_query_string_complex input_params = { number: 7, name: "\u{6B66}\u{8005}", "ids[]" => [1, 3, 5, 7], words: ["Sing", "Now!"], params: { x: "50%", y: "100%", t: [80, 160] }, } expected_params = [ ['number', '7'], ['name', "\u{6B66}\u{8005}"], ['ids[]', '1'], ['ids[]', '3'], ['ids[]', '5'], ['ids[]', '7'], ['words', 'Sing'], ['words', 'Now!'], ['params[x]', '50%'], ['params[y]', '100%'], ['params[t]', '80'], ['params[t]', '160'], ] query = @MU.build_query_string(input_params) assert_equal expected_params, URI.decode_www_form(query) end end mechanize-2.7.5/test/test_mechanize_form_multi_select_list.rb0000644000004100000410000000366412772546476024712 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeFormMultiSelectList < Mechanize::TestCase def setup super page = html_page <<-BODY
    BODY form = page.forms.first @select = form.fields.first end def test_inspect assert_match "value: #{%w[2]}", @select.inspect end def test_inspect_select_all @select.select_all assert_match "value: #{%w[1 2 3 4 5 6]}", @select.inspect end def test_option_with option = @select.option_with :value => '1' assert_equal '1', option.value end def test_options_with options = @select.options_with :value => /[12]/ assert_equal 2, options.length end def test_query_value assert_equal [%w[select 2]], @select.query_value @select.options.last.click assert_equal [%w[select 2], %w[select 6]], @select.query_value end def test_query_value_empty @select.options.last.click @select.options.last.instance_variable_set :@value, '' assert_equal [%w[select 2], ['select', '']], @select.query_value end def test_select_all @select.select_all assert_equal %w[1 2 3 4 5 6], @select.value end def test_select_none @select.select_none assert_empty @select.value end def test_selected_options assert_equal [@select.options[1]], @select.selected_options @select.options.last.click assert_equal [@select.options[1], @select.options.last], @select.selected_options end def test_value assert_equal %w[2], @select.value end def test_value_equals @select.value = %w[a 1 2] assert_equal %w[a 1 2], @select.value end end mechanize-2.7.5/test/test_mechanize_http_auth_realm.rb0000644000004100000410000000206312772546476023313 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeHttpAuthRealm < Mechanize::TestCase def setup super @uri = URI 'http://example/' @AR = Mechanize::HTTP::AuthRealm @realm = @AR.new 'Digest', @uri, 'r' end def test_initialize assert_equal 'r', @realm.realm realm = @AR.new 'Digest', @uri, 'R' refute_equal 'r', realm.realm realm = @AR.new 'Digest', @uri, 'R' assert_equal 'R', realm.realm realm = @AR.new 'Digest', @uri, nil assert_nil realm.realm end def test_equals2 other = @realm.dup assert_equal @realm, other other = @AR.new 'Basic', @uri, 'r' refute_equal @realm, other other = @AR.new 'Digest', URI('http://other.example/'), 'r' refute_equal @realm, other other = @AR.new 'Digest', @uri, 'R' refute_equal @realm, other other = @AR.new 'Digest', @uri, 's' refute_equal @realm, other end def test_hash h = {} h[@realm] = 1 other = @realm.dup assert_equal 1, h[other] other = @AR.new 'Basic', @uri, 'r' assert_nil h[other] end end mechanize-2.7.5/test/test_mechanize_page_meta_refresh.rb0000644000004100000410000000773212772546476023603 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizePageMetaRefresh < Mechanize::TestCase def setup super @MR = Mechanize::Page::MetaRefresh @uri = URI 'http://example/here/' end def util_page delay, uri body = <<-BODY BODY Mechanize::Page.new(@uri, nil, body, 200, @mech) end def util_meta_refresh page node = page.search('meta').first @MR.from_node node, page end def test_class_parse delay, uri, link_self = @MR.parse "0; url=http://localhost:8080/path", @uri assert_equal "0", delay assert_equal "http://localhost:8080/path", uri.to_s refute link_self delay, uri, link_self = @MR.parse "100.001; url=http://localhost:8080/path", @uri assert_equal "100.001", delay assert_equal "http://localhost:8080/path", uri.to_s refute link_self delay, uri, link_self = @MR.parse "0; url='http://localhost:8080/path'", @uri assert_equal "0", delay assert_equal "http://localhost:8080/path", uri.to_s refute link_self delay, uri, link_self = @MR.parse "0; url=\"http://localhost:8080/path\"", @uri assert_equal "0", delay assert_equal "http://localhost:8080/path", uri.to_s refute link_self delay, uri, link_self = @MR.parse "0; url=", @uri assert_equal "0", delay assert_equal "http://example/here/", uri.to_s assert link_self delay, uri, link_self = @MR.parse "0", @uri assert_equal "0", delay assert_equal "http://example/here/", uri.to_s assert link_self delay, uri, link_self = @MR.parse " 0; ", @uri assert_equal "0", delay assert_equal "http://example/here/", uri.to_s assert link_self delay, uri, link_self = @MR.parse " 0 ; ", @uri assert_equal "0", delay assert_equal "http://example/here/", uri.to_s assert link_self delay, uri, link_self = @MR.parse "0; UrL=http://localhost:8080/path", @uri assert_equal "0", delay assert_equal "http://localhost:8080/path", uri.to_s refute link_self delay, uri, link_self = @MR.parse "0 ; UrL = http://localhost:8080/path", @uri assert_equal "0", delay assert_equal "http://localhost:8080/path", uri.to_s refute link_self end def test_class_parse_funky delay, uri, link_self = @MR.parse "0; url=/funky?Welcome<%2Fb>", @uri assert_equal "0", delay assert_equal "http://example/funky?%3Cb%3EWelcome%3C%2Fb%3E", uri.to_s refute link_self end def test_class_from_node page = util_page 5, 'http://b.example' link = util_meta_refresh page assert_equal 5, link.delay assert_equal 'http://b.example', link.href page = util_page 5, 'http://example/a' link = util_meta_refresh page assert_equal 5, link.delay assert_equal 'http://example/a', link.href page = util_page 5, 'test' link = util_meta_refresh page assert_equal 5, link.delay assert_equal 'test', link.href page = util_page 5, '/test' link = util_meta_refresh page assert_equal 5, link.delay assert_equal '/test', link.href page = util_page 5, nil link = util_meta_refresh page assert_equal 5, link.delay assert_equal nil, link.href page = util_page 5, @uri link = util_meta_refresh page assert_equal 5, link.delay assert_equal 'http://example/here/', link.href end def test_class_from_node_no_content body = <<-BODY BODY page = Mechanize::Page.new(@uri, nil, body, 200, @mech) assert_nil util_meta_refresh page end def test_class_from_node_not_refresh body = <<-BODY BODY page = Mechanize::Page.new(@uri, nil, body, 200, @mech) assert_nil util_meta_refresh page end def test_meta_refresh_click_sends_no_referer page = util_page 0, '/referer' link = util_meta_refresh page refreshed = link.click assert_equal '', refreshed.body end end mechanize-2.7.5/test/test_mechanize_file.rb0000644000004100000410000000465412772546476021062 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeFile < Mechanize::TestCase def setup super @parser = Mechanize::File end def test_save uri = URI 'http://example/name.html' page = Mechanize::File.new uri, nil, '0123456789' Dir.mktmpdir do |dir| Dir.chdir dir do filename = page.save 'test.html' assert File.exist? 'test.html' assert_equal '0123456789', File.read('test.html') assert_equal "test.html", filename filename = page.save 'test.html' assert File.exist? 'test.html.1' assert_equal '0123456789', File.read('test.html.1') assert_equal "test.html.1", filename filename = page.save 'test.html' assert File.exist? 'test.html.2' assert_equal '0123456789', File.read('test.html.2') assert_equal "test.html.2", filename end end end def test_save_default uri = URI 'http://example/test.html' page = Mechanize::File.new uri, nil, '' Dir.mktmpdir do |dir| Dir.chdir dir do filename = page.save assert File.exist? 'test.html' assert_equal "test.html", filename filename = page.save assert File.exist? 'test.html.1' assert_equal "test.html.1", filename filename = page.save assert File.exist? 'test.html.2' assert_equal "test.html.2", filename end end end def test_save_default_dots uri = URI 'http://localhost/../test.html' page = Mechanize::File.new uri, nil, '' Dir.mktmpdir do |dir| Dir.chdir dir do filename = page.save assert File.exist? 'test.html' assert_equal "test.html", filename filename = page.save assert File.exist? 'test.html.1' assert_equal "test.html.1", filename end end end def test_filename uri = URI 'http://localhost/test.html' page = Mechanize::File.new uri, nil, '' assert_equal "test.html", page.filename end def test_save_overwrite uri = URI 'http://example/test.html' page = Mechanize::File.new uri, nil, '' Dir.mktmpdir do |dir| Dir.chdir dir do filename = page.save 'test.html' assert File.exist? 'test.html' assert_equal "test.html", filename filename = page.save! 'test.html' assert File.exist? 'test.html' refute File.exist? 'test.html.1' assert_equal "test.html", filename end end end end mechanize-2.7.5/test/test_mechanize_xml_file.rb0000644000004100000410000000107712772546476021736 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeXmlFile < Mechanize::TestCase def setup super uri = URI 'http://example.com/foo.xml' @xml = Mechanize::XmlFile.new uri, nil, <<-XML Ruby Perl XML end def test_xml assert_kind_of Nokogiri::XML::Document, @xml.xml end def test_search assert_equal ['Ruby', 'Perl'], @xml.search('language').map { |n| n.text } end def test_at assert_equal 'Perl', @xml.at('//language[2]').text end endmechanize-2.7.5/test/test_mechanize_page_image.rb0000644000004100000410000001315612772546476022216 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizePageImage < Mechanize::TestCase def setup super @uri = URI 'http://example/' @src = (@uri + 'a.jpg').to_s @empty_page = Mechanize::Page.new(@uri, nil, '', 200, @mech) end def img attributes img = node 'img', attributes Mechanize::Page::Image.new img, @empty_page end def test_initialize image = img("src" => "a.jpg", "alt" => "alt", "width" => "100", "height" => "200", "title" => "title", "id" => "id", "class" => "class") assert_equal "a.jpg", image.src assert_equal "alt", image.alt assert_equal "100", image.width assert_equal "200", image.height assert_equal "title", image.title assert_equal "id", image.dom_id assert_equal "class", image.dom_class end def test_initialize_no_attributes image = img({}) assert_nil image.src assert_nil image.alt assert_nil image.width assert_nil image.height assert_nil image.title assert_nil image.dom_id assert_nil image.dom_class end def test_caption assert_equal "", img("src" => @src).caption assert_equal "alt", img("src" => @src, "alt" => "alt").caption assert_equal "title", img("src" => @src, "title" => "title").caption assert_equal "title", img("src" => @src, "alt" => "alt", "title" => "title").caption end def test_url assert_equal ".jpg", img('src' => @src).extname assert_equal "http://example/a.jpg", img('src' => @src).url.to_s assert_equal "http://example/a%20.jpg", img('src' => 'http://example/a .jpg' ).url.to_s end def test_url_base page = html_page <<-BODY BODY assert_equal "http://other.example/a.jpg", page.images.first.url end def test_extname assert_equal ".jpg", img("src" => "a.jpg").extname assert_equal ".PNG", img("src" => "a.PNG").extname assert_equal ".aaa", img("src" => "unknown.aaa").extname assert_equal "", img("src" => "nosuffiximage").extname assert_nil img("width" => "1", "height" => "1").extname assert_equal ".jpg", img("src" => "a.jpg?cache_buster").extname end def test_mime_type assert_equal "image/jpeg", img("src" => "a.jpg").mime_type assert_equal "image/png", img("src" => "a.PNG").mime_type assert_nil img("src" => "unknown.aaa").mime_type assert_nil img("src" => "nosuffiximage").mime_type end def test_fetch image = img "src" => "http://localhost/button.jpg" fetched = image.fetch assert_equal fetched, @mech.page assert_equal "http://localhost/button.jpg", fetched.uri.to_s assert_equal "http://example/", requests.first['Referer'] assert @mech.visited? "http://localhost/button.jpg" end def test_fetch_referer_http_page_rel_src # | rel-src http-src https-src # http page | *page* page page # https page | page empty empty page = html_page '' page.images.first.fetch assert_equal 'http', page.uri.scheme assert_equal true, page.images.first.relative? assert_equal "http://example/", requests.first['Referer'] end def test_fetch_referer_http_page_abs_src # | rel-src http-src https-src # http page | page *page* *page* # https page | page empty empty page = html_page '' page.images.first.fetch assert_equal 'http', page.uri.scheme assert_equal false, page.images.first.relative? assert_equal "http://example/", requests.first['Referer'] end def test_fetch_referer_https_page_rel_src # | rel-src http-src https-src # http page | page page page # https page | *page* empty empty page = html_page '' page.uri = URI 'https://example/' page.images.first.fetch assert_equal 'https', page.uri.scheme assert_equal true, page.images.first.relative? assert_equal "https://example/", requests.first['Referer'] end def test_fetch_referer_https_page_abs_src # | rel-src http-src https-src # http page | page page page # https page | page *empty* *empty* page = html_page '' page.uri = URI 'https://example/' page.images.first.fetch assert_equal 'https', page.uri.scheme assert_equal false, page.images.first.relative? assert_equal nil, requests.first['Referer'] end def test_image_referer_http_page_abs_src page = html_page '' assert_equal 'http', page.uri.scheme assert_equal @uri, page.images.first.image_referer.uri end def test_image_referer_http_page_rel_src page = html_page '' assert_equal 'http', page.uri.scheme assert_equal @uri, page.images.first.image_referer.uri end def test_image_referer_https_page_abs_src page = html_page '' page.uri = URI 'https://example/' assert_equal 'https', page.uri.scheme assert_nil page.images.first.image_referer.uri end def test_image_referer_https_page_rel_src page = html_page '' page.uri = URI 'https://example/' assert_equal 'https', page.uri.scheme assert_equal URI('https://example/'), page.images.first.image_referer.uri end def test_no_src_attribute page = html_page '' page.uri = URI 'https://example/' assert_equal URI('https://example/'), page.images.first.url end end mechanize-2.7.5/test/test_mechanize_page_encoding.rb0000644000004100000410000001225712772546476022723 0ustar www-datawww-data# -*- coding: utf-8 -*- require 'mechanize/test_case' # tests for Page encoding and charset and parsing class TestMechanizePageEncoding < Mechanize::TestCase MECH_ASCII_ENCODING = 'US-ASCII' def setup super @uri = URI('http://localhost/') @response_headers = { 'content-type' => 'text/html' } @body = 'hi' end def util_page body = @body, headers = @response_headers Mechanize::Page.new @uri, headers, body && body.force_encoding(Encoding::BINARY), 200, @mech end def test_page_charset charset = Mechanize::Page.charset 'text/html;charset=vAlue' assert_equal 'vAlue', charset charset = Mechanize::Page.charset 'text/html;charset=vaLue, text/html' assert_equal 'vaLue', charset charset = Mechanize::Page.charset 'text/html ; charset = valUe, text/html' assert_equal 'valUe', charset end def test_page_charset_upcase charset = Mechanize::Page.charset 'TEXT/HTML;CHARSET=UTF-8' assert_equal 'UTF-8', charset end def test_page_charset_semicolon charset = Mechanize::Page.charset 'text/html;charset=UTF-8;' assert_equal 'UTF-8', charset end def test_page_charset_no_chaset_token charset = Mechanize::Page.charset 'text/html' assert_nil charset end def test_page_charset_returns_nil_when_charset_says_none charset = Mechanize::Page.charset 'text/html;charset=none' assert_nil charset end def test_page_charset_multiple charset = Mechanize::Page.charset 'text/html;charset=111;charset=222' assert_equal '111', charset end def test_page_response_header_charset headers = { 'content-type' => 'text/html;charset=HEADER' } charsets = Mechanize::Page.response_header_charset(headers) assert_equal ['HEADER'], charsets end def test_page_response_header_charset_no_token headers = {'content-type' => 'text/html'} charsets = Mechanize::Page.response_header_charset(headers) assert_equal [], charsets headers = {'X-My-Header' => 'hello'} charsets = Mechanize::Page.response_header_charset(headers) assert_equal [], charsets end def test_page_response_header_charset_wrong_header headers = { 'x-content-type' => 'text/html;charset=bogus' } charsets = Mechanize::Page.response_header_charset(headers) assert_equal [], charsets end def test_response_header_charset page = util_page nil, {'content-type' => 'text/html;charset=HEADER'} assert_equal ['HEADER'], page.response_header_charset end def test_page_meta_charset body = '' charsets = Mechanize::Page.meta_charset(body) assert_equal ['META'], charsets end def test_page_meta_charset_is_empty_when_no_charset_meta body = '' charsets = Mechanize::Page.meta_charset(body) assert_equal [], charsets end def test_page_meta_charset_no_content body = '' charsets = Mechanize::Page.meta_charset(body) assert_empty charsets end # Test to fix issue: https://github.com/sparklemotion/mechanize/issues/143 def test_page_meta_charset_handles_whitespace body = '' charsets = Mechanize::Page.meta_charset(body) assert_equal ["iso-8859-1"], charsets end def test_meta_charset body = '' page = util_page body assert_equal ['META'], page.meta_charset end def test_detected_encoding page = util_page assert_equal MECH_ASCII_ENCODING, page.detected_encoding end def test_encodings response = {'content-type' => 'text/html;charset=HEADER'} body = '' @mech.default_encoding = 'DEFAULT' page = util_page body, response assert_equal true, page.encodings.include?('HEADER') assert_equal true, page.encodings.include?('META') assert_equal true, page.encodings.include?(MECH_ASCII_ENCODING) assert_equal true, page.encodings.include?('DEFAULT') end def test_parser_with_default_encoding # pre test assert_equal false, util_page.encodings.include?('Windows-1252') @mech.default_encoding = 'Windows-1252' page = util_page assert_equal true, page.encodings.include?('Windows-1252') end def test_parser_force_default_encoding @mech.default_encoding = 'Windows-1252' @mech.force_default_encoding = true page = util_page assert page.encodings.include? 'Windows-1252' end def test_parser_encoding_equals_overwrites_force_default_encoding @mech.default_encoding = 'Windows-1252' @mech.force_default_encoding = true page = util_page assert_equal 'Windows-1252', page.encoding page.encoding = 'ISO-8859-2' assert_equal 'ISO-8859-2', page.encoding end def test_parser_encoding_when_searching_elements skip "Encoding not implemented" unless have_encoding? body = 'hi' page = util_page body, 'content-type' => 'text/html,charset=ISO-8859-1' result = page.search('#latin1') assert_equal Encoding::UTF_8, result.text.encoding end end mechanize-2.7.5/test/test_mechanize_pluggable_parser.rb0000644000004100000410000000261512772546476023454 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizePluggableParser < Mechanize::TestCase def setup super @pp = @mech.pluggable_parser end def test_aref @pp['text/html'] = Mechanize::Download assert_equal Mechanize::Download, @pp['text/html'] end def test_csv @pp.csv = Mechanize::Download assert_equal Mechanize::Download, @pp['text/csv'] end def test_html assert_equal Mechanize::Page, @pp['text/html'] @pp.html = Mechanize::Download assert_equal Mechanize::Download, @pp['text/html'] end def test_parser assert_equal Mechanize::XmlFile, @pp.parser('text/xml') assert_equal Mechanize::File, @pp.parser(nil) end def test_parser_mime @pp['image/png'] = :png assert_equal :png, @pp.parser('x-image/x-png') assert_equal :png, @pp.parser('image/png') assert_equal Mechanize::Image, @pp.parser('image') end def test_parser_bogus assert_nil @pp['bogus'] assert_equal Mechanize::File, @pp.parser('bogus') end def test_pdf @pp.pdf = Mechanize::Download assert_equal Mechanize::Download, @pp['application/pdf'] end def test_xml assert_equal Mechanize::XmlFile, @pp['text/xml'] assert_equal Mechanize::XmlFile, @pp['application/xml'] @pp.xml = Mechanize::Download assert_equal Mechanize::Download, @pp['text/xml'] assert_equal Mechanize::Download, @pp['application/xml'] end end mechanize-2.7.5/test/test_mechanize_parser.rb0000644000004100000410000001477612772546476021445 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeParser < Mechanize::TestCase class P include Mechanize::Parser attr_accessor :filename attr_accessor :response attr_accessor :uri def initialize @uri = URI 'http://example' @full_path = false end end def setup super @parser = P.new end def test_extract_filename @parser.response = {} assert_equal 'index.html', @parser.extract_filename end def test_extract_filename_content_disposition @parser.uri = URI 'http://example/foo' @parser.response = { 'content-disposition' => 'attachment; filename=genome.jpeg' } assert_equal 'genome.jpeg', @parser.extract_filename end def test_extract_filename_content_disposition_bad @parser.uri = URI 'http://example/foo' @parser.response = { 'content-disposition' => "inline; filename*=UTF-8''X%20Y.jpg" } assert_equal 'foo.html', @parser.extract_filename @parser.response = { 'content-disposition' => "inline; filename=\"\"" } assert_equal 'foo.html', @parser.extract_filename end def test_extract_filename_content_disposition_path @parser.uri = URI 'http://example' @parser.response = { 'content-disposition' => 'attachment; filename="../genome.jpeg"' } assert_equal 'example/genome.jpeg', @parser.extract_filename(true) @parser.response = { 'content-disposition' => 'attachment; filename="foo/genome.jpeg"' } assert_equal 'example/genome.jpeg', @parser.extract_filename(true) end def test_extract_filename_content_disposition_path_windows @parser.uri = URI 'http://example' @parser.response = { 'content-disposition' => 'attachment; filename="..\\\\genome.jpeg"' } assert_equal 'example/genome.jpeg', @parser.extract_filename(true) @parser.response = { 'content-disposition' => 'attachment; filename="foo\\\\genome.jpeg"' } assert_equal 'example/genome.jpeg', @parser.extract_filename(true) end def test_extract_filename_content_disposition_full_path @parser.uri = URI 'http://example/foo' @parser.response = { 'content-disposition' => 'attachment; filename=genome.jpeg' } assert_equal 'example/genome.jpeg', @parser.extract_filename(true) end def test_extract_filename_content_disposition_quoted @parser.uri = URI 'http://example' @parser.response = { 'content-disposition' => 'attachment; filename="some \"file\""' } assert_equal 'some__file_', @parser.extract_filename end def test_extract_filename_content_disposition_special @parser.uri = URI 'http://example/foo' @parser.response = { 'content-disposition' => 'attachment; filename="/\\\\<>:\\"|?*"' } assert_equal '_______', @parser.extract_filename chars = (0..12).map { |c| c.chr }.join chars += "\\\r" chars += (14..31).map { |c| c.chr }.join @parser.response = { 'content-disposition' => "attachment; filename=\"#{chars}\"" } assert_equal '_' * 32, @parser.extract_filename end def test_extract_filename_content_disposition_windows_special @parser.uri = URI 'http://example' windows_special = %w[ AUX COM1 COM2 COM3 COM4 COM5 COM6 COM7 COM8 COM9 CON LPT1 LPT2 LPT3 LPT4 LPT5 LPT6 LPT7 LPT8 LPT9 NUL PRN ] windows_special.each do |special| @parser.response = { 'content-disposition' => "attachment; filename=#{special}" } assert_equal "_#{special}", @parser.extract_filename end end def test_extract_filename_host @parser.response = {} @parser.uri = URI 'http://example' assert_equal 'example/index.html', @parser.extract_filename(true) end def test_extract_filename_special_character @parser.response = {} invisible = "\t\n\v\f\r" invisible.chars.each do |char| begin @parser.uri = URI "http://example/#{char}" assert_equal 'index.html', @parser.extract_filename, char.inspect rescue URI::InvalidURIError # ignore end end escaped = "<>\"\\|" escaped.chars.each do |char| escaped_char = CGI.escape char @parser.uri = URI "http://example/#{escaped_char}" assert_equal "#{escaped_char}.html", @parser.extract_filename, char end @parser.uri = URI "http://example/?" assert_equal 'index.html_', @parser.extract_filename, 'empty query' @parser.uri = URI "http://example/:" assert_equal '_.html', @parser.extract_filename, 'colon' @parser.uri = URI "http://example/*" assert_equal '_.html', @parser.extract_filename, 'asterisk' end def test_extract_filename_uri @parser.response = {} @parser.uri = URI 'http://example/foo' assert_equal 'foo.html', @parser.extract_filename @parser.uri += '/foo.jpg' assert_equal 'foo.jpg', @parser.extract_filename end def test_extract_filename_uri_full_path @parser.response = {} @parser.uri = URI 'http://example/foo' assert_equal 'example/foo.html', @parser.extract_filename(true) @parser.uri += '/foo.jpg' assert_equal 'example/foo.jpg', @parser.extract_filename(true) end def test_extract_filename_uri_query @parser.response = {} @parser.uri = URI 'http://example/?id=5' assert_equal 'index.html_id=5', @parser.extract_filename @parser.uri += '/foo.html?id=5' assert_equal 'foo.html_id=5', @parser.extract_filename end def test_extract_filename_uri_slash @parser.response = {} @parser.uri = URI 'http://example/foo/' assert_equal 'example/foo/index.html', @parser.extract_filename(true) @parser.uri += '/foo///' assert_equal 'example/foo/index.html', @parser.extract_filename(true) end def test_extract_filename_windows_special @parser.uri = URI 'http://example' @parser.response = {} windows_special = %w[ AUX COM1 COM2 COM3 COM4 COM5 COM6 COM7 COM8 COM9 CON LPT1 LPT2 LPT3 LPT4 LPT5 LPT6 LPT7 LPT8 LPT9 NUL PRN ] windows_special.each do |special| @parser.uri += "/#{special}" assert_equal "_#{special}.html", @parser.extract_filename end end def test_fill_header @parser.fill_header 'a' => 'b' expected = { 'a' => 'b' } assert_equal expected, @parser.response end def test_fill_header_nil @parser.fill_header nil assert_empty @parser.response end end mechanize-2.7.5/test/test_mechanize_file_response.rb0000644000004100000410000000112512772546476022766 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeFileResponse < Mechanize::TestCase def test_content_type Tempfile.open %w[pi .nothtml] do |tempfile| res = Mechanize::FileResponse.new tempfile.path assert_equal nil, res['content-type'] end Tempfile.open %w[pi .xhtml] do |tempfile| res = Mechanize::FileResponse.new tempfile.path assert_equal 'text/html', res['content-type'] end Tempfile.open %w[pi .html] do |tempfile| res = Mechanize::FileResponse.new tempfile.path assert_equal 'text/html', res['Content-Type'] end end end mechanize-2.7.5/test/test_mechanize_file_saver.rb0000644000004100000410000000052312772546476022251 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeFileSaver < Mechanize::TestCase def setup super @uri = URI 'http://example' @io = StringIO.new 'hello world' end def test_initialize in_tmpdir do Mechanize::FileSaver.new @uri, nil, @io, 200 assert File.exist? 'example/index.html' end end end mechanize-2.7.5/test/test_mechanize_http_auth_store.rb0000644000004100000410000001247712772546476023361 0ustar www-datawww-data# coding: utf-8 require 'mechanize/test_case' class TestMechanizeHttpAuthStore < Mechanize::TestCase def setup super @store = Mechanize::HTTP::AuthStore.new @uri = URI.parse 'http://example/' end def test_add_auth @store.add_auth @uri + '/path', 'user', 'pass' expected = { @uri => { nil => ['user', 'pass', nil], } } assert_equal expected, @store.auth_accounts end def test_add_auth_domain @store.add_auth @uri + '/path', 'user1', 'pass', nil, 'domain' expected = { @uri => { nil => %w[user1 pass domain], } } assert_equal expected, @store.auth_accounts e = assert_raises ArgumentError do @store.add_auth @uri, 'user3', 'pass', 'realm', 'domain' end assert_equal 'NTLM domain given with realm which NTLM does not use', e.message end def test_add_auth_realm @store.add_auth @uri, 'user1', 'pass' @store.add_auth @uri, 'user2', 'pass', 'realm' expected = { @uri => { nil => ['user1', 'pass', nil], 'realm' => ['user2', 'pass', nil], } } assert_equal expected, @store.auth_accounts end def test_add_auth_realm_case @store.add_auth @uri, 'user1', 'pass', 'realm' @store.add_auth @uri, 'user2', 'pass', 'Realm' expected = { @uri => { 'realm' => ['user1', 'pass', nil], 'Realm' => ['user2', 'pass', nil], } } assert_equal expected, @store.auth_accounts end def test_add_auth_string @store.add_auth "#{@uri}/path", 'user', 'pass' expected = { @uri => { nil => ['user', 'pass', nil], } } assert_equal expected, @store.auth_accounts end def test_add_default_auth _, err = capture_io do @store.add_default_auth 'user', 'pass' end expected = ['user', 'pass', nil] assert_equal expected, @store.default_auth assert_match 'DISCLOSURE WITHOUT YOUR KNOWLEDGE', err capture_io do @store.add_default_auth 'user', 'pass', 'realm' end expected = %w[user pass realm] assert_equal expected, @store.default_auth end def test_credentials_eh challenges = [ Mechanize::HTTP::AuthChallenge.new('Basic', 'realm' => 'r'), Mechanize::HTTP::AuthChallenge.new('Digest', 'realm' => 'r'), ] refute @store.credentials? @uri, challenges @store.add_auth @uri, 'user', 'pass' assert @store.credentials? @uri, challenges assert @store.credentials? "#{@uri}/path", challenges end def test_credentials_for assert_nil @store.credentials_for(@uri, 'realm') @store.add_auth @uri, 'user', 'pass', 'realm' assert_equal ['user', 'pass', nil], @store.credentials_for(@uri, 'realm') assert_equal ['user', 'pass', nil], @store.credentials_for(@uri.to_s, 'realm') assert_nil @store.credentials_for(@uri, 'other') end def test_credentials_for_default assert_nil @store.credentials_for(@uri, 'realm') capture_io do @store.add_default_auth 'user1', 'pass' end assert_equal ['user1', 'pass', nil], @store.credentials_for(@uri, 'realm') @store.add_auth @uri, 'user2', 'pass' assert_equal ['user2', 'pass', nil], @store.credentials_for(@uri, 'realm') assert_equal ['user2', 'pass', nil], @store.credentials_for(@uri, 'other') end def test_credentials_for_no_realm @store.add_auth @uri, 'user', 'pass' # no realm set assert_equal ['user', 'pass', nil], @store.credentials_for(@uri, 'realm') end def test_credentials_for_realm @store.add_auth @uri, 'user1', 'pass' @store.add_auth @uri, 'user2', 'pass', 'realm' assert_equal ['user2', 'pass', nil], @store.credentials_for(@uri, 'realm') assert_equal ['user1', 'pass', nil], @store.credentials_for(@uri, 'other') end def test_credentials_for_realm_case @store.add_auth @uri, 'user1', 'pass', 'realm' @store.add_auth @uri, 'user2', 'pass', 'Realm' assert_equal ['user1', 'pass', nil], @store.credentials_for(@uri, 'realm') assert_equal ['user2', 'pass', nil], @store.credentials_for(@uri, 'Realm') end def test_credentials_for_path @store.add_auth @uri, 'user', 'pass', 'realm' uri = @uri + '/path' assert_equal ['user', 'pass', nil], @store.credentials_for(uri, 'realm') end def test_remove_auth @store.remove_auth @uri assert_empty @store.auth_accounts end def test_remove_auth_both @store.add_auth @uri, 'user1', 'pass' @store.add_auth @uri, 'user2', 'pass', 'realm' uri = @uri + '/path' @store.remove_auth uri assert_empty @store.auth_accounts end def test_remove_auth_realm @store.add_auth @uri, 'user1', 'pass' @store.add_auth @uri, 'user2', 'pass', 'realm' @store.remove_auth @uri, 'realm' expected = { @uri => { nil => ['user1', 'pass', nil] } } assert_equal expected, @store.auth_accounts end def test_remove_auth_realm_case @store.add_auth @uri, 'user1', 'pass', 'realm' @store.add_auth @uri, 'user2', 'pass', 'Realm' @store.remove_auth @uri, 'Realm' expected = { @uri => { 'realm' => ['user1', 'pass', nil] } } assert_equal expected, @store.auth_accounts end def test_remove_auth_string @store.add_auth @uri, 'user1', 'pass' @store.remove_auth "#{@uri}/path" assert_empty @store.auth_accounts end end mechanize-2.7.5/test/test_mechanize_link.rb0000644000004100000410000001011712772546476021067 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeLink < Mechanize::TestCase def test_click page = @mech.get("http://localhost/frame_test.html") link = page.link_with(:text => "Form Test") assert_equal('Form Test', link.text) page = link.click assert_equal("http://localhost/form_test.html", @mech.history.last.uri.to_s) end unless RUBY_ENGINE == 'jruby' # NekoHTML does not parse body of NOFRAMES def test_click_bang page = @mech.get("http://localhost/frame_test.html") link = page.link_with!(:text => "Form Test") assert_equal('Form Test', link.text) page = link.click assert_equal("http://localhost/form_test.html", @mech.history.last.uri.to_s) end unless RUBY_ENGINE == 'jruby' # NekoHTML does not parse body of NOFRAMES def test_click_base page = @mech.get("http://google.com/tc_base_link.html") page = page.links.first.click assert @mech.visited?("http://localhost/index.html") end def test_click_unsupported_scheme page = @mech.get("http://google.com/tc_links.html") link = page.link_with(:text => 'javascript link') assert_raises Mechanize::UnsupportedSchemeError do begin link.click rescue Mechanize::UnsupportedSchemeError => error assert_equal 'javascript', error.scheme assert_equal "javascript:new_page('1')", error.uri.to_s raise end end @mech.scheme_handlers['javascript'] = lambda { |my_link, my_page| URI.parse('http://localhost/tc_links.html') } link.click # HACK no assertion end def test_click_unexiting_link page = @mech.get("http://google.com/tc_links.html") assert_raises NoMethodError do page.link_with(:text => 'no link').click end begin page.link_with!(:text => 'no link').click rescue => e assert_instance_of Mechanize::ElementNotFoundError, e assert_kind_of Mechanize::Page, e.source assert_equal :link, e.element assert_kind_of Hash, e.conditions assert_equal 'no link', e.conditions[:text] end end def test_click_empty_href page = @mech.get("http://google.com/tc_links.html?q=test#anchor") link = page.link_with(:text => 'empty href') new_page = link.click assert_equal "http://google.com/tc_links.html?q=test", new_page.uri.to_s end def test_text_alt_text page = @mech.get("http://localhost/alt_text.html") assert_equal(5, page.links.length) assert_equal(1, page.meta_refresh.length) assert_equal '', page.meta_refresh.first.text assert_equal 'alt text', page.link_with(:href => 'alt_text.html').text assert_equal '', page.link_with(:href => 'no_alt_text.html').text assert_equal 'no image', page.link_with(:href => 'no_image.html').text assert_equal '', page.link_with(:href => 'no_text.html').text assert_equal '', page.link_with(:href => 'nil_alt_text.html').text end def test_uri_escaped doc = Nokogiri::HTML::Document.new node = Nokogiri::XML::Node.new('foo', doc) node['href'] = 'http://foo.bar/%20baz' link = Mechanize::Page::Link.new(node, nil, nil) assert_equal 'http://foo.bar/%20baz', link.uri.to_s end def test_uri_no_path page = @mech.get("http://localhost/relative/tc_relative_links.html") page = page.link_with(:text => 'just the query string').click assert_equal('http://localhost/relative/tc_relative_links.html?a=b', page.uri.to_s) end unless RUBY_ENGINE == 'jruby' # NekoHTML does not parse IFRAME def test_uri_weird doc = Nokogiri::HTML::Document.new node = Nokogiri::XML::Node.new('foo', doc) node['href'] = 'http://foo.bar/ baz' link = Mechanize::Page::Link.new(node, nil, nil) assert_equal 'http://foo.bar/%20baz', link.uri.to_s end def test_resolving_full_uri page = @mech.get("http://localhost/frame_test.html") link = page.link_with(:text => "Form Test") assert_equal "/form_test.html", link.uri.to_s assert_equal "http://localhost/form_test.html", link.resolved_uri.to_s end unless RUBY_ENGINE == 'jruby' # NekoHTML does not parse body of NOFRAMES end mechanize-2.7.5/test/test_mechanize_form.rb0000644000004100000410000007640112772546476021105 0ustar www-datawww-datarequire 'mechanize/test_case' class TestMechanizeForm < Mechanize::TestCase def setup super @uri = URI 'http://example' @page = page @uri @form = Mechanize::Form.new node('form', 'name' => @NAME), @mech, @page end def test_action form = Mechanize::Form.new node('form', 'action' => '?a=b&b=c') assert_equal '?a=b&b=c', form.action end def test_add_button_to_query button = Mechanize::Form::Button.new node('input', 'type' => 'submit') e = assert_raises ArgumentError do @form.add_button_to_query button end assert_equal "#{button.inspect} does not belong to the same page " \ "as the form \"#{@NAME}\" in #{@uri}", e.message end def test_aset assert_empty @form.keys @form['intarweb'] = 'Aaron' assert_equal 'Aaron', @form['intarweb'] end def test_aset_exists page = html_page <<-BODY Page Title
    BODY form = page.form_with(:name => 'post_form') assert_equal %w[first first], form.keys form['first'] = 'Aaron' assert_equal 'Aaron', form['first'] assert_equal ['Aaron', ''], form.values end def test_build_query_blank_form page = @mech.get('http://localhost/tc_blank_form.html') form = page.forms.first query = form.build_query assert(query.length > 0) assert query.all? { |x| x[1] == '' } end def test_build_query_radio_button_duplicate html = Nokogiri::HTML <<-HTML
    HTML form = Mechanize::Form.new html.at('form'), @mech, @page query = form.build_query assert_equal [%w[name a]], query end def test_build_query_radio_button_multiple_checked html = Nokogiri::HTML <<-HTML
    HTML form = Mechanize::Form.new html.at('form'), @mech, @page e = assert_raises Mechanize::Error do form.build_query end assert_equal 'radiobuttons "a, b" are checked in the "name" group, ' \ 'only one is allowed', e.message end def test_method_missing_get page = html_page <<-BODY
    BODY form = page.forms.first assert_equal 'some value', form.not_a_method end def test_method_missing_set page = html_page <<-BODY
    BODY form = page.forms.first form.not_a_method = 'some value' assert_equal [%w[not_a_method some\ value]], form.build_query end def test_parse_buttons page = html_page <<-BODY