ferret-0.11.8.6/0000755000004100000410000000000012476264460013277 5ustar www-datawww-dataferret-0.11.8.6/Rakefile0000755000004100000410000003256312476264460014760 0ustar www-datawww-datarequire 'rake' require 'rake/clean' require 'rubygems/package_task' require 'rdoc/task' require 'rake/testtask' $:. << 'lib' require 'ferret/version' def say(msg='') STDERR.puts msg end def prompt(msg) STDERR.print "#{msg} [Yna]: " while true case STDIN.gets.chomp! when /^(y(es)?)?$/i then return true when /^no?$/i then return false when /^a(bort)?$/i then fail('aborted') else STDERR.print "Sorry, I don't understand. Please type y, n or a: " end end end windows = (RUBY_PLATFORM =~ /win32|cygwin/) rescue nil SUDO = windows ? "" : "sudo " task :default => 'test:unit' #task :default => :build do # sh "ruby test/unit/index/tc_index.rb" #end BZLIB_SRC = FileList["../c/lib/bzlib/*.h"] + FileList["../c/lib/bzlib/*.c"].map do |fn| fn.gsub(%r{/([^/]*.c)}, '/BZ_\1') end ############################################################################## # Building ############################################################################## task :build => 'build:compile' namespace :build do EXT = "ferret_ext.so" # Note: libstemmer.[h] is necessary so that the file isn't included when it # doesn't exist. It needs to have one regular expression element. EXT_SRC = FileList["../c/src/*.[ch]", "../c/include/*.h", "../c/lib/bzlib/*.[ch]", "../c/lib/libstemmer_c/src_c/*.[ch]", "../c/lib/libstemmer_c/runtime/*.[ch]", "../c/lib/libstemmer_c/libstemmer/*.[ch]", "../c/lib/libstemmer_c/include/libstemmer.[h]"] EXT_SRC.exclude('../c/**/ind.[ch]', '../c/**/symbol.[ch]', '../c/include/threading.h', '../c/include/scanner.h', '../c/include/internal.h', '../c/src/lang.c', '../c/include/lang.h') EXT_SRC_MAP = {} EXT_SRC_DEST = EXT_SRC.map do |fn| ext_fn = File.join("ext", File.basename(fn)) if fn =~ /.c$/ and fn =~ /(bzlib|stemmer)/ prefix = $1.upcase ext_fn.gsub!(/ext\//, "ext/#{prefix}_") end EXT_SRC_MAP[fn] = ext_fn end SRC = FileList["ext/*.[ch]", EXT_SRC_DEST, 'ext/internal.h'].uniq CLEAN.include ['**/*.o', '**/*.obj', '.config', 'ext/cferret.c'] CLOBBER.include ['doc/api', 'ext/*.so', 'ext/Makefile', 'ext/internal.h', EXT_SRC_DEST] # The following block creates file tasks for all of the c files. They # belong in the ../c directory in source the working copy and they need # to be linked to in the ext directory EXT_SRC.each do |fn| dest_fn = EXT_SRC_MAP[fn] # prepend lib files to avoid conflicts file dest_fn => fn do |t| ln_sf File.expand_path(fn), File.expand_path(dest_fn) if fn =~ /stemmer/ # flatten the directory structure for lib_stemmer open(dest_fn) do |in_f| open(dest_fn + ".out", "w") do |out_f| in_f.each do |line| out_f.write(line.sub(/(#include ["<])[.a-z_\/]*\//, '\1')) end end end mv dest_fn + ".out", dest_fn end end end if File.exists?("../c") file 'ext/internal.h' => '../c/include/internal.h' do File.open('ext/internal.h', 'w') do |f| File.readlines('../c/include/internal.h').each do |l| next if l =~ /ALLOC/ and l !~ /ZERO|MP_/ f.puts(l) end end end desc "Build the extension (ferret_ext.so). You'll need a C compiler and Make." task :compile => ["ext/#{EXT}"] + SRC file "ext/#{EXT}" => "ext/Makefile" do cd "ext" if windows and ENV['make'].nil? begin sh "nmake" rescue Exception => e path = ':\Program Files\Microsoft Visual Studio\VC98\Bin\VCVARS32.BAT' if File.exists? "f#{path}" sh "f#{path}" elsif File.exists? "c#{path}" sh "c#{path}" else say say "***************************************************************" say "You need to have Visual C++ 6 to build Ferret on Windows." say "If you have it installed, you may need to run;" say ' C:\Program Files\Microsoft Visual Studio\VC98\Bin\VCVARS32.BAT' say "***************************************************************" say raise e end sh "nmake" end else sh "make" end cd ".." end file "ext/Makefile" => SRC do cd "ext" ruby "extconf.rb" cd ".." end end ############################################################################## # Testing ############################################################################## task :test => 'test:units' namespace :test do desc "Run tests with Valgrind" task :valgrind do sh "valgrind --suppressions=ferret_valgrind.supp " + "--leak-check=yes --show-reachable=yes " + "-v ruby test/unit/index/tc_index_reader.rb" end desc "Run all tests" task :all => [ :units ] desc "run unit tests in test/unit" Rake::TestTask.new("units" => :build) do |t| t.libs << "test/unit" t.pattern = 'test/unit/t[cs]_*.rb' t.verbose = true end task :unit => :units desc "run tests using locally installed gem" Rake::TestTask.new("installed") do |t| t.libs << "test/unit" t.ruby_opts << '-rtest/test_installed' t.pattern = 'test/unit/t[cs]_*.rb' t.verbose = true end end ############################################################################## # Documentation ############################################################################## desc "Generate API documentation" task :doc => 'doc:rdoc' namespace :doc do # if allison = Gem.cache.find_name('allison').last # allison_template = File.join(allison.full_gem_path, 'lib/allison.rb') # end desc "Generate documentation for the application" $rd = Rake::RDocTask.new do |rdoc| rdoc.rdoc_dir = 'doc/api' rdoc.title = "Ferret Search Library Documentation" rdoc.options << '--line-numbers' rdoc.options << '--inline-source' rdoc.options << '--charset=utf-8' # rdoc.template = allison_template if allison_template rdoc.rdoc_files.include('README') rdoc.rdoc_files.include('TODO') rdoc.rdoc_files.include('TUTORIAL') rdoc.rdoc_files.include('MIT-LICENSE') rdoc.rdoc_files.include('lib/**/*.rb') rdoc.rdoc_files.include('ext/r_*.c') rdoc.rdoc_files.include('ext/ferret.c') end desc "Look for TODO and FIXME tags in the code" task :todo do FileList['**/*.rb', 'ext/*.[ch]'].egrep /[#*].*(FIXME|TODO|TBD)/i end end ############################################################################## # Packaging and Installing ############################################################################## PKG_FILES = FileList[ 'setup.rb', '[-A-Z]*', 'lib/**/*.rb', 'lib/**/*.rhtml', 'lib/**/*.css', 'lib/**/*.js', 'test/**/*.rb', 'test/**/wordfile', 'rake_utils/**/*.rb', 'Rakefile', SRC ] spec = Gem::Specification.new do |s| #### Basic information. s.name = 'ferret' s.version = Ferret::VERSION s.summary = "Ruby indexing library." s.description = "Ferret is a super fast, highly configurable search library." #### Dependencies and requirements. s.add_development_dependency('rake') s.files = PKG_FILES.to_a s.extensions << "ext/extconf.rb" s.require_path = 'lib' s.bindir = 'bin' s.executables = ['ferret-browser'] s.default_executable = 'ferret-browser' #### Author and project details. s.author = "David Balmain" s.email = "dbalmain@gmail.com" #s.homepage = "http://ferret.davebalmain.com/trac" s.homepage = "http://github.com/jkraemer/ferret" s.rubyforge_project = "ferret" s.has_rdoc = true s.extra_rdoc_files = $rd.rdoc_files.reject { |fn| fn =~ /\.rb$/ }.to_a s.rdoc_options << '--title' << 'Ferret -- Ruby Search Library' << '--main' << 'README' << '--line-numbers' << 'TUTORIAL' << 'TODO' key_file = File.expand_path('~/.gem/gem-private_key.pem') key_file = nil unless File.exists?(key_file) cert_file = File.expand_path('~/.gem/gem-public_cert.pem') cert_file = nil unless File.exists?(cert_file) if key_file and cert_file s.signing_key = key_file s.cert_chain = cert_file end if windows s.files = PKG_FILES.to_a + ["ext/#{EXT}"] s.extensions.clear s.platform = Gem::Platform::WIN32 else s.platform = Gem::Platform::RUBY end end package_task = Gem::PackageTask.new(spec) do |pkg| unless windows pkg.need_zip = true pkg.need_tar = true end end desc "Run :gem and install the resulting gem" task :install => :gem do sh "#{SUDO}gem install pkg/ferret-#{Ferret::VERSION}.gem --no-rdoc --no-ri -l" end desc "Run :clobber and uninstall the .gem" task :uninstall => :clobber do sh "#{SUDO}gem uninstall ferret" end desc "Same as :install but you must be rootgem" task :root_install => :gem do sh "gem install pkg/ferret-#{Ferret::VERSION}.gem --no-rdoc --no-ri -l" end desc "Same as :uninstall but you must be root" task :root_uninstall => :clobber do sh "gem uninstall ferret" end def list_changes_since_last_release tag_listing = `svn list svn://davebalmain.com/ferret/tags` last_tag = tag_listing.split("\n").last log = `svn log --stop-on-copy svn://davebalmain.com/ferret/tags/#{last_tag}` first_log = log.split(/-------+/)[-2] last_revision = /^r(\d+)\s+\|/.match(first_log)[1] `svn log .. -rHEAD:#{last_revision}` end desc "List changes since last release" task :changes do puts list_changes_since_last_release end if ENV['FERRET_DEV'] ############################################################################## # Releasing ############################################################################## desc "Generate and upload a new release" task :release => 'release:release' namespace :release do task :release => [:status_check, 'test:all', :package, :tag] do say say "**************************************************************" say "* Release #{Ferret::VERSION} Complete." say "* Packages ready to upload." say "**************************************************************" say reversion("lib/ferret/version.rb") end # Validate that everything is ready to go for a release. task :status_check do # Are all source files checked in? unless `svn -q --ignore-externals status` =~ /^$/ fail "'svn -q status' is not clean ... do you have unchecked-in files?" end say "No outstanding checkins found ... OK" end def reversion(fn) new_version = nil begin print "Ferret is currently at #{Ferret::VERSION}. What version now? " new_version = STDIN.gets.chomp! end until prompt("Change to version #{new_version}?") if ENV['RELTEST'] say "Would change the version in lib/ferret/version.rb from" say " #{Ferret::VERSION} => #{new_version}" say "and then commit the changes with the command" say " svn ci -m \"Updated to version #{new_version}\" " + "lib/ferret/version.rb" else open(fn) do |ferret_in| open(fn + ".new", "w") do |ferret_out| ferret_in.each do |line| if line =~ /^ VERSION\s*=\s*/ ferret_out.puts " VERSION = '#{new_version}'" else ferret_out.puts line end end end end mv fn + ".new", fn sh %{svn ci -m "Updated to version #{new_version}" lib/ferret/version.rb} end end # Tag all the SVN files with the latest release number task :tag => :status_check do reltag = "REL-#{Ferret::VERSION}" say "Tagging SVN with [#{reltag}]" if ENV['RELTEST'] say "Release Task Testing, skipping SVN tagging. Would do;" say %{svn copy -m "creating release #{reltag}" svn://www.davebalmain.com/ferret/trunk svn://www.davebalmain.com/ferret/tags/#{reltag}} else sh %{svn copy -m "creating release #{reltag}" svn://www.davebalmain.com/ferret/trunk svn://www.davebalmain.com/ferret/tags/#{reltag}} end end end ############################################################################## # Publishing ############################################################################## namespace :publish do PUBLISH_PROMPT = <<-EOF Make sure you updated RELEASE_NOTES and RELEASE_CHANGES and that the package exists. Are you sure you want to continue? EOF desc "Publish gem on rubyforge for download. Will only do the linux version" task :release do exit unless prompt(PUBLISH_PROMPT) require 'rubyforge' require 'rake/contrib/rubyforgepublisher' version = Ferret::VERSION packages = %w(gem tgz zip).map {|ext| "pkg/ferret-#{version}.#{ext}"} rubyforge = RubyForge.new rubyforge.login rubyforge.add_release('ferret', 'ferret', "ferret-#{version}", *packages) end desc "Publish the documentation" task :docs => 'doc:rdoc' do sh %{rsync -rzv --delete -e 'ssh -p 8900' doc/api/ davebalmain.com:/var/www/ferret/api} end desc "Publish the documentation and release" task :all => [:doc, :release] end end # # In case I ever need to add another racc parser, here's how # # # Make Parsers --------------------------------------------------------------- # # RACC_SRC = FileList["lib/**/*.y"] # # task :parsers => RACC_OUT # rule(/\.tab\.rb$/ => [proc {|tn| tn.sub(/\.tab\.rb$/, '.y')}]) do |t| # sh "racc #{t.source}" # end ferret-0.11.8.6/CHANGELOG0000644000004100000410000000224112476264460014510 0ustar www-datawww-dataThu Mar 08 00:18:44 +0100 2012 * Ruby 1.9 compatibility * This release is basically the same as the now obsolete jk-ferret 0.11.8.3 Fri Oct 20 22:25:37 JST 2006 * Added Filter#bits method to built-in Filters. * Added MappingFilter < TokenFilter that can be used to map strings to other strings during analysis. A possible use of this is it to Filter utf-8 characters to ascii characters. Fri Oct 13 09:18:31 JST 2006 * Changed documentation to state truthfully that FULL_ENGLISH_STOP_WORDS is being used by default in StandardAnalyzer and StopwordFilter. * Removed 'will', 's' and 't' from ENGLISH_STOP_WORDS so that all words in ENGLISH_STOP_WORDS can be found in FULL_ENGLISH_STOP_WORDS, that is ENGLISH_STOP_WORDS is a subset of FULL_ENGLISH_STOP_WORDS. Thu Oct 12 23:04:19 JST 2006 * Fixed adding SortField to Sort object in Ruby. Garbage collection wasn't working. * Can now set :sort => SortField#new Tue Oct 10 14:42:17 JST 2006 * Fixed MultiTermDocEnum bug introduced in version 0.10.10 during performance enhancements. * Added Filter#bits(index_reader) method to C implemented filters so that they can be used in Ruby. ferret-0.11.8.6/bin/0000755000004100000410000000000012476264460014047 5ustar www-datawww-dataferret-0.11.8.6/bin/ferret-browser0000755000004100000410000000373712476264460016757 0ustar www-datawww-data#!/usr/bin/env ruby $: << File.expand_path(File.join(File.basename(__FILE__), '../lib')) require 'ferret' require 'ferret/browser' require 'optparse' require 'ostruct' SERVER_OPTIONS = ['webrick'] conf = OpenStruct.new(:host => '0.0.0.0', :port => 3301) opts = OptionParser.new do |opts| opts.banner = "Usage: #{File.basename($0)} /path/to/index" opts.separator "" opts.separator "Specific Options:" opts.on("-h", "--host HOSTNAME", "Host for web server to bind to (default is all IPs)") { |host| conf.host = host } opts.on("-p", "--port NUM", "Port for web server (defaults to #{conf.port})") { |port| conf.port = port } opts.on("-s", "--server NAME", "Server to force (#{SERVER_OPTIONS.join(', ')}).") { |s| conf.server = s.to_sym } opts.separator "" opts.separator "Common options:" opts.on_tail("-?", "--help", "Show this message") do puts opts exit end opts.on_tail("-v", "--version", "Show version") do puts Ferret::VERSION exit end end opts.parse! ARGV if ARGV.length != 1 puts opts exit end @path = ARGV[0] # Load the Ferret index begin @reader = Ferret::Index::IndexReader.new(@path) rescue Ferret::FileNotFoundError => e puts "\033[31mCannot start Ferret. No index exists at \"\033[m" + "\033[33m#{@path}\033[m\033[31m\".\033[m" exit rescue Exception => e puts "\033[31mCannot start Ferret.\n\033[m\033[33m#{e.to_s}\031[m" exit end unless conf.server conf.server = :webrick end case conf.server.to_s when 'webrick' require 'webrick/httpserver' require 'ferret/browser/webrick' # Mount the root s = WEBrick::HTTPServer.new(:BindAddress => conf.host, :Port => conf.port) s.mount "/s", WEBrick::HTTPServlet::FileHandler, Ferret::Browser::Controller::STATIC_DIR, true s.mount "/", WEBrick::FerretBrowserHandler, @reader, @path # Server up trap(:INT) do s.shutdown end s.start else raise "server #{conf.server} not known. Must be one of [#{SERVER_OPTIONS.join(', ')}]" end ferret-0.11.8.6/MIT-LICENSE0000644000004100000410000000204612476264460014735 0ustar www-datawww-dataCopyright (c) 2005-2006 David Balmain Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ferret-0.11.8.6/README0000644000004100000410000000603412476264460014162 0ustar www-datawww-data= Ferret Ferret is a Ruby search library inspired by the Apache Lucene search engine for Java (http://jakarta.apache.org/lucene/). In the same way as Lucene, it is not a standalone application, but a library you can use to index documents and search for things in them later. == Requirements * Ruby 1.8 * C compiler to build the extension. Tested with gcc, VC6 * make (or nmake on windows) == Installation $ sudo gem install ferret If you don't have rubygems installed you can still install Ferret. Just download one of the zipped up versions of Ferret, unzip it and change into the unzipped directory. Then run the following set of commands; $ ruby setup.rb config $ ruby setup.rb setup $ sudo ruby setup.rb install == Usage You can read the TUTORIAL which you'll find in the same directory as this README. You can also check the following modules for more specific documentation. * Ferret::Analysis: for more information on how the data is processed when it is tokenized. There are a number of things you can do with your data such as adding stop lists or perhaps a porter stemmer. There are also a number of analyzers already available and it is almost trivial to create a new one with a simple regular expression. * Ferret::Search: for more information on querying the index. There are a number of already available queries and it's unlikely you'll need to create your own. You may however want to take advantage of the sorting or filtering abilities of Ferret to present your data the best way you see fit. * Ferret::Document: to find out how to create documents. This part of Ferret is relatively straightforward. If you know how Strings, Hashes and Arrays work Ferret then you'll be able to create Documents. * Ferret::QueryParser: if you want to find out more about what you can do with Ferret's Query Parser, this is the place to look. The query parser is one area that could use a bit of work so please send your suggestions. * Ferret::Index: for more advanced access to the index you'll probably want to use the Ferret::Index::IndexWriter and Ferret::Index::IndexReader. This is the place to look for more information on them. * Ferret::Store: This is the module used to access the actual index storage and won't be of much interest to most people. === Performance We are unaware of any alternatives that can out-perform Ferret while still matching it in features. == Contact For bug reports and patches I have set up Trac here; http://ferret.davebalmain.com/trac Queries, discussion etc should be addressed to the mailing lists here; http://rubyforge.org/projects/ferret/ Alternatively you could create a new page for discussion on the Ferret wiki; http://ferret.davebalmain.com/trac Of course, since Ferret was ported from Apache Lucene, most of what you can do with Lucene you can also do with Ferret. == Authors [David Balmain] Port to Ruby [The Apache Software Foundation (Doug Cutting and friends)] Original Apache Lucene == License Ferret is available under an MIT-style license. :include: MIT-LICENSE ferret-0.11.8.6/RELEASE_CHANGES0000644000004100000410000001514512476264460015440 0ustar www-datawww-data(in /home/dave/w/ferret/ruby) ------------------------------------------------------------------------ r830 | dbalmain | 2008-03-01 14:10:47 +1100 (Sat, 01 Mar 2008) | 1 line A few more updates to the build system ------------------------------------------------------------------------ r829 | dbalmain | 2008-03-01 13:55:16 +1100 (Sat, 01 Mar 2008) | 1 line A few more updates to the build system ------------------------------------------------------------------------ r828 | dbalmain | 2008-02-29 10:48:33 +1100 (Fri, 29 Feb 2008) | 1 line Moved largefile test to a new long_running test directory so that tests can be run more easily ------------------------------------------------------------------------ r826 | dbalmain | 2008-02-29 10:09:48 +1100 (Fri, 29 Feb 2008) | 1 line Updated references to ferret/version.rb. Updated Rakefile to allow release task testing ------------------------------------------------------------------------ r825 | dbalmain | 2008-02-29 10:02:27 +1100 (Fri, 29 Feb 2008) | 1 line Updated Rakefile to clean it up (added namespaces). ------------------------------------------------------------------------ r824 | dbalmain | 2008-02-29 10:01:08 +1100 (Fri, 29 Feb 2008) | 1 line Updated :filter_proc so that custom PostFilter extensions can be built and applied to searches. See ruby/examples/c_extensions/age_filter ------------------------------------------------------------------------ r823 | dbalmain | 2008-02-26 18:44:39 +1100 (Tue, 26 Feb 2008) | 1 line Fixed ticket #277. This will also help with the correct highlighting of matching urls in queries ------------------------------------------------------------------------ r822 | dbalmain | 2008-02-22 12:46:48 +1100 (Fri, 22 Feb 2008) | 1 line Updated to latest posh.h => http://poshlib.hookatooka.com/poshlib/ ------------------------------------------------------------------------ r821 | dbalmain | 2008-02-22 09:05:25 +1100 (Fri, 22 Feb 2008) | 1 line Added ruby bindings to MultiMapper for testing in Ruby ------------------------------------------------------------------------ r820 | dbalmain | 2008-02-09 14:30:55 +1100 (Sat, 09 Feb 2008) | 1 line Updated svn:ignore properties to handle new stemmer files ------------------------------------------------------------------------ r819 | dbalmain | 2008-02-09 14:27:46 +1100 (Sat, 09 Feb 2008) | 9 lines Fixed Ticket #337. StemFilter.new now works with 'English', :english or 'EnGlIsH' and 'UTF_8' or 'utf-8' etc. Also, good news for Norwegians, Romanians, Turks and Finns. We now have 3 new stemmers for Norwegian, Romanian and Turkish and 2 new stop-word lists for Finnish and Hungarian. Please try them out and let me know if there are any problems. ------------------------------------------------------------------------ r818 | dbalmain | 2008-02-09 13:20:37 +1100 (Sat, 09 Feb 2008) | 1 line Removed old version of stemmer. Making way for new version. This will break build temporarily ------------------------------------------------------------------------ r817 | dbalmain | 2008-02-09 11:55:02 +1100 (Sat, 09 Feb 2008) | 1 line Added a very useful group_by example. ------------------------------------------------------------------------ r816 | dbalmain | 2008-02-09 09:26:20 +1100 (Sat, 09 Feb 2008) | 1 line Updated documentation for :filter_proc to indicate that you can return a Float to be used to modify the score. ------------------------------------------------------------------------ r815 | dbalmain | 2008-02-09 00:27:58 +1100 (Sat, 09 Feb 2008) | 1 line Made TypedRangeQuery the default range query when used from Ferret::Index::Index ------------------------------------------------------------------------ r814 | dbalmain | 2008-02-08 23:16:55 +1100 (Fri, 08 Feb 2008) | 1 line Made the TypedRangeQuery optional in the query parser ------------------------------------------------------------------------ r813 | dbalmain | 2008-02-08 23:12:13 +1100 (Fri, 08 Feb 2008) | 1 line Added TypedRangeQuery and TypedRangeFilter to the ruby bindings ------------------------------------------------------------------------ r812 | dbalmain | 2008-02-08 22:19:31 +1100 (Fri, 08 Feb 2008) | 1 line Added TypedRangeQuery so that you can do range queries with unpadded numbers ------------------------------------------------------------------------ r811 | dbalmain | 2008-02-08 16:22:06 +1100 (Fri, 08 Feb 2008) | 3 lines Whoops, quick fix. Had unnecessarily nested locks. ------------------------------------------------------------------------ r810 | dbalmain | 2008-02-08 16:17:33 +1100 (Fri, 08 Feb 2008) | 1 line Added patch for Ticket #340 which adds batch updating and deleting. Made significant modifications from the patch. ------------------------------------------------------------------------ r809 | dbalmain | 2008-02-08 13:49:07 +1100 (Fri, 08 Feb 2008) | 18 lines Added score filter. This enables you to filter the results and modify the score to change the sort order. For example to modify the scoring so that a document with todays date gets twice the score factor as a document 50 days ago and four times the score factor of a document 100 days ago (ie a half life of 50 days) you would do this; fifty_day_half_life_filter = lambda do |doc, score, searcher| days = (Date.today() - Date.parse(searcher[doc][:date])).to_i 1.0 / (2.0 ** (days.to_f / 50.0)) end top_docs = @searcher.search(q, :filter_proc => fifty_day_half_life_filter) ------------------------------------------------------------------------ r808 | dbalmain | 2008-01-11 07:14:01 +1100 (Fri, 11 Jan 2008) | 1 line Changed unsigned long longs to f_u64 type to fix ticket #336 ------------------------------------------------------------------------ r807 | dbalmain | 2008-01-11 07:12:40 +1100 (Fri, 11 Jan 2008) | 1 line Changed unsigned long longs to f_u64 type to fix ticket #336 ------------------------------------------------------------------------ r806 | dbalmain | 2008-01-11 07:01:00 +1100 (Fri, 11 Jan 2008) | 1 line Minor comment correction ------------------------------------------------------------------------ r805 | dbalmain | 2007-12-12 10:28:23 +1100 (Wed, 12 Dec 2007) | 1 line Fixed Ticket #332. Added spaces so that code parses correctly. ------------------------------------------------------------------------ r804 | dbalmain | 2007-12-03 11:20:34 +1100 (Mon, 03 Dec 2007) | 1 line Added test for ticket #324 ------------------------------------------------------------------------ r803 | dbalmain | 2007-12-03 11:12:55 +1100 (Mon, 03 Dec 2007) | 1 line Fixed Ferret::Index::Index#query_update for ticket #324. Was only updating a maximum of 10 records. ------------------------------------------------------------------------ ferret-0.11.8.6/lib/0000755000004100000410000000000012476264460014045 5ustar www-datawww-dataferret-0.11.8.6/lib/ferret.rb0000644000004100000410000000250412476264460015662 0ustar www-datawww-data#-- # Copyright (c) 2005 David Balmain # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #++ # :include: ../TUTORIAL $: << File.expand_path(File.join(File.dirname(__FILE__), "../ext")) require 'ferret_ext' require 'ferret/version' require 'ferret/document' require 'ferret/index' require 'ferret/field_infos' require 'ferret/field_symbol' ferret-0.11.8.6/lib/ferret/0000755000004100000410000000000012476264460015334 5ustar www-datawww-dataferret-0.11.8.6/lib/ferret/index.rb0000644000004100000410000011150012476264460016766 0ustar www-datawww-datarequire 'monitor' module Ferret::Index # This is a simplified interface to the index. See the TUTORIAL for more # information on how to use this class. class Index include MonitorMixin include Ferret::Store include Ferret::Search attr_reader :options # If you create an Index without any options, it'll simply create an index # in memory. But this class is highly configurable and every option that # you can supply to IndexWriter and QueryParser, you can also set here. # Please look at the options for the constructors to these classes. # # === Options # # See; # # * QueryParser # * IndexWriter # # default_input_field:: Default: "id". This specifies the default field # that will be used when you add a simple string # to the index using #add_document or <<. # id_field:: Default: "id". This field is as the field to # search when doing searches on a term. For # example, if you do a lookup by term "cat", ie # index["cat"], this will be the field that is # searched. # key:: Default: nil. Expert: This should only be used # if you really know what you are doing. Basically # you can set a field or an array of fields to be # the key for the index. So if you add a document # with a same key as an existing document, the # existing document will be replaced by the new # object. Using a multiple field key will slow # down indexing so it should not be done if # performance is a concern. A single field key (or # id) should be find however. Also, you must make # sure that your key/keys are either untokenized # or that they are not broken up by the analyzer. # auto_flush:: Default: false. Set this option to true if you # want the index automatically flushed every time # you do a write (includes delete) to the index. # This is useful if you have multiple processes # accessing the index and you don't want lock # errors. Setting :auto_flush to true has a huge # performance impact so don't use it if you are # concerned about performance. In that case you # should think about setting up a DRb indexing # service. # lock_retry_time:: Default: 2 seconds. This parameter specifies how # long to wait before retrying to obtain the # commit lock when detecting if the IndexReader is # at the latest version. # close_dir:: Default: false. If you explicitly pass a # Directory object to this class and you want # Index to close it when it is closed itself then # set this to true. # use_typed_range_query:: Default: true. Use TypedRangeQuery instead of # the standard RangeQuery when parsing # range queries. This is useful if you have number # fields which you want to perform range queries # on. You won't need to pad or normalize the data # in the field in anyway to get correct results. # However, performance will be a lot slower for # large indexes, hence the default. # # == Examples # # index = Index::Index.new(:analyzer => WhiteSpaceAnalyzer.new()) # # index = Index::Index.new(:path => '/path/to/index', # :create_if_missing => false, # :auto_flush => true) # # index = Index::Index.new(:dir => directory, # :default_slop => 2, # :handle_parse_errors => false) # # You can also pass a block if you like. The index will be yielded and # closed at the index of the box. For example; # # Ferret::I.new() do |index| # # do stuff with index. Most of your actions will be cached. # end def initialize(options = {}, &block) super() if options[:key] @key = options[:key] if @key.is_a?(Array) @key.flatten.map {|k| k.to_s.intern} end else @key = nil end if (fi = options[:field_infos]).is_a?(String) options[:field_infos] = FieldInfos.load(fi) end @close_dir = options[:close_dir] if options[:dir].is_a?(String) options[:path] = options[:dir] end if options[:path] @close_dir = true begin @dir = FSDirectory.new(options[:path], options[:create]) rescue IOError => io @dir = FSDirectory.new(options[:path], options[:create_if_missing] != false) end elsif options[:dir] @dir = options[:dir] else options[:create] = true # this should always be true for a new RAMDir @close_dir = true @dir = RAMDirectory.new end @dir.extend(MonitorMixin) unless @dir.kind_of? MonitorMixin options[:dir] = @dir options[:lock_retry_time]||= 2 @options = options if (!@dir.exists?("segments")) || options[:create] IndexWriter.new(options).close end options[:analyzer]||= Ferret::Analysis::StandardAnalyzer.new if options[:use_typed_range_query].nil? options[:use_typed_range_query] = true end @searcher = nil @writer = nil @reader = nil @options.delete(:create) # only create the first time if at all @auto_flush = @options[:auto_flush] || false if (@options[:id_field].nil? and @key.is_a?(Symbol)) @id_field = @key else @id_field = @options[:id_field] || :id end @default_field = (@options[:default_field]||= :*) @default_input_field = options[:default_input_field] || @id_field if @default_input_field.respond_to?(:intern) @default_input_field = @default_input_field.intern end @open = true @qp = nil if block yield self self.close end end # Returns an array of strings with the matches highlighted. The +query+ can # either a query String or a Ferret::Search::Query object. The doc_id is # the id of the document you want to highlight (usually returned by the # search methods). There are also a number of options you can pass; # # === Options # # field:: Default: @options[:default_field]. The default_field # is the field that is usually highlighted but you can # specify which field you want to highlight here. If # you want to highlight multiple fields then you will # need to call this method multiple times. # excerpt_length:: Default: 150. Length of excerpt to show. Highlighted # terms will be in the centre of the excerpt. Set to # :all to highlight the entire field. # num_excerpts:: Default: 2. Number of excerpts to return. # pre_tag:: Default: "". Tag to place to the left of the # match. You'll probably want to change this to a # "" tag with a class. Try "\033[36m" for use in # a terminal. # post_tag:: Default: "". This tag should close the # +:pre_tag+. Try tag "\033[m" in the terminal. # ellipsis:: Default: "...". This is the string that is appended # at the beginning and end of excerpts (unless the # excerpt hits the start or end of the field. # Alternatively you may want to use the HTML entity # … or the UTF-8 string "\342\200\246". def highlight(query, doc_id, options = {}) @dir.synchronize do ensure_searcher_open() @searcher.highlight(do_process_query(query), doc_id, options[:field]||@options[:default_field], options) end end # Closes this index by closing its associated reader and writer objects. def close @dir.synchronize do if not @open raise(StandardError, "tried to close an already closed directory") end @searcher.close() if @searcher @reader.close() if @reader @writer.close() if @writer @dir.close() if @close_dir @open = false end end # Get the reader for this index. # NOTE:: This will close the writer from this index. def reader ensure_reader_open() return @reader end # Get the searcher for this index. # NOTE:: This will close the writer from this index. def searcher ensure_searcher_open() return @searcher end # Get the writer for this index. # NOTE:: This will close the reader from this index. def writer ensure_writer_open() return @writer end # Adds a document to this index, using the provided analyzer instead of # the local analyzer if provided. If the document contains more than # IndexWriter::MAX_FIELD_LENGTH terms for a given field, the remainder are # discarded. # # There are three ways to add a document to the index. # To add a document you can simply add a string or an array of strings. # This will store all the strings in the "" (ie empty string) field # (unless you specify the default_field when you create the index). # # index << "This is a new document to be indexed" # index << ["And here", "is another", "new document", "to be indexed"] # # But these are pretty simple documents. If this is all you want to index # you could probably just use SimpleSearch. So let's give our documents # some fields; # # index << {:title => "Programming Ruby", :content => "blah blah blah"} # index << {:title => "Programming Ruby", :content => "yada yada yada"} # # Or if you are indexing data stored in a database, you'll probably want # to store the id; # # index << {:id => row.id, :title => row.title, :date => row.date} # # See FieldInfos for more information on how to set field properties. def add_document(doc, analyzer = nil) @dir.synchronize do ensure_writer_open() if doc.is_a?(String) or doc.is_a?(Array) doc = {@default_input_field => doc} end # delete existing documents with the same key if @key if @key.is_a?(Array) query = @key.inject(BooleanQuery.new()) do |bq, field| bq.add_query(TermQuery.new(field, doc[field].to_s), :must) bq end query_delete(query) else id = doc[@key].to_s if id @writer.delete(@key, id) end end end ensure_writer_open() if analyzer old_analyzer = @writer.analyzer @writer.analyzer = analyzer @writer.add_document(doc) @writer.analyzer = old_analyzer else @writer.add_document(doc) end flush() if @auto_flush end end alias :<< :add_document # Run a query through the Searcher on the index. A TopDocs object is # returned with the relevant results. The +query+ is a built in Query # object or a query string that can be parsed by the Ferret::QueryParser. # Here are the options; # # === Options # # offset:: Default: 0. The offset of the start of the section of the # result-set to return. This is used for paging through # results. Let's say you have a page size of 10. If you # don't find the result you want among the first 10 results # then set +:offset+ to 10 and look at the next 10 results, # then 20 and so on. # limit:: Default: 10. This is the number of results you want # returned, also called the page size. Set +:limit+ to # +:all+ to return all results # sort:: A Sort object or sort string describing how the field # should be sorted. A sort string is made up of field names # which cannot contain spaces and the word "DESC" if you # want the field reversed, all separated by commas. For # example; "rating DESC, author, title". Note that Ferret # will try to determine a field's type by looking at the # first term in the index and seeing if it can be parsed as # an integer or a float. Keep this in mind as you may need # to specify a fields type to sort it correctly. For more # on this, see the documentation for SortField # filter:: a Filter object to filter the search results with # filter_proc:: a filter Proc is a Proc which takes the doc_id, the score # and the Searcher object as its parameters and returns a # Boolean value specifying whether the result should be # included in the result set. def search(query, options = {}) @dir.synchronize do return do_search(query, options) end end # Run a query through the Searcher on the index. A TopDocs object is # returned with the relevant results. The +query+ is a Query object or a # query string that can be validly parsed by the Ferret::QueryParser. The # Searcher#search_each method yields the internal document id (used to # reference documents in the Searcher object like this; # +searcher[doc_id]+) and the search score for that document. It is # possible for the score to be greater than 1.0 for some queries and # taking boosts into account. This method will also normalize scores to # the range 0.0..1.0 when the max-score is greater than 1.0. Here are the # options; # # === Options # # offset:: Default: 0. The offset of the start of the section of the # result-set to return. This is used for paging through # results. Let's say you have a page size of 10. If you # don't find the result you want among the first 10 results # then set +:offset+ to 10 and look at the next 10 results, # then 20 and so on. # limit:: Default: 10. This is the number of results you want # returned, also called the page size. Set +:limit+ to # +:all+ to return all results # sort:: A Sort object or sort string describing how the field # should be sorted. A sort string is made up of field names # which cannot contain spaces and the word "DESC" if you # want the field reversed, all separated by commas. For # example; "rating DESC, author, title". Note that Ferret # will try to determine a field's type by looking at the # first term in the index and seeing if it can be parsed as # an integer or a float. Keep this in mind as you may need # to specify a fields type to sort it correctly. For more # on this, see the documentation for SortField # filter:: a Filter object to filter the search results with # filter_proc:: a filter Proc is a Proc which takes the doc_id, the score # and the Searcher object as its parameters and returns a # Boolean value specifying whether the result should be # included in the result set. # # returns:: The total number of hits. # # === Example # eg. # index.search_each(query, options = {}) do |doc, score| # puts "hit document number #{doc} with a score of #{score}" # end # def search_each(query, options = {}) # :yield: doc, score @dir.synchronize do ensure_searcher_open() query = do_process_query(query) @searcher.search_each(query, options) do |doc, score| yield doc, score end end end # Run a query through the Searcher on the index, ignoring scoring and # starting at +:start_doc+ and stopping when +:limit+ matches have been # found. It returns an array of the matching document numbers. # # There is a big performance advange when using this search method on a # very large index when there are potentially thousands of matching # documents and you only want say 50 of them. The other search methods need # to look at every single match to decide which one has the highest score. # This search method just needs to find +:limit+ number of matches before # it returns. # # === Options # # start_doc:: Default: 0. The start document to start the search from. # NOTE very carefully that this is not the same as the # +:offset+ parameter used in the other search methods # which refers to the offset in the result-set. This is the # document to start the scan from. So if you scanning # through the index in increments of 50 documents at a time # you need to use the last matched doc in the previous # search to start your next search. See the example below. # limit:: Default: 50. This is the number of results you want # returned, also called the page size. Set +:limit+ to # +:all+ to return all results. # TODO: add option to return loaded documents instead # # === Options # # start_doc = 0 # begin # results = @searcher.scan(query, :start_doc => start_doc) # yield results # or do something with them # start_doc = results.last # # start_doc will be nil now if results is empty, ie no more matches # end while start_doc def scan(query, options = {}) @dir.synchronize do ensure_searcher_open() query = do_process_query(query) @searcher.scan(query, options) end end # Retrieves a document/documents from the index. The method for retrieval # depends on the type of the argument passed. # # If +arg+ is an Integer then return the document based on the internal # document number. # # If +arg+ is a Range, then return the documents within the range based on # internal document number. # # If +arg+ is a String then search for the first document with +arg+ in # the +id+ field. The +id+ field is either :id or whatever you set # +:id_field+ parameter to when you create the Index object. def doc(*arg) @dir.synchronize do id = arg[0] if id.kind_of?(String) or id.kind_of?(Symbol) ensure_reader_open() term_doc_enum = @reader.term_docs_for(@id_field, id.to_s) return term_doc_enum.next? ? @reader[term_doc_enum.doc] : nil else ensure_reader_open(false) return @reader[*arg] end end end alias :[] :doc # Retrieves the term_vector for a document. The document can be referenced # by either a string id to match the id field or an integer corresponding # to Ferret's document number. # # See Ferret::Index::IndexReader#term_vector def term_vector(id, field) @dir.synchronize do ensure_reader_open() if id.kind_of?(String) or id.kind_of?(Symbol) term_doc_enum = @reader.term_docs_for(@id_field, id.to_s) if term_doc_enum.next? id = term_doc_enum.doc else return nil end end return @reader.term_vector(id, field) end end # iterate through all documents in the index. This method preloads the # documents so you don't need to call #load on the document to load all the # fields. def each @dir.synchronize do ensure_reader_open (0...@reader.max_doc).each do |i| yield @reader[i].load unless @reader.deleted?(i) end end end # Deletes a document/documents from the index. The method for determining # the document to delete depends on the type of the argument passed. # # If +arg+ is an Integer then delete the document based on the internal # document number. Will raise an error if the document does not exist. # # If +arg+ is a String then search for the documents with +arg+ in the # +id+ field. The +id+ field is either :id or whatever you set +:id_field+ # parameter to when you create the Index object. Will fail quietly if the # no document exists. # # If +arg+ is a Hash or an Array then a batch delete will be performed. # If +arg+ is an Array then it will be considered an array of +id+'s. If # it is a Hash, then its keys will be used instead as the Array of # document +id+'s. If the +id+ is an Integer then it is considered a # Ferret document number and the corresponding document will be deleted. # If the +id+ is a String or a Symbol then the +id+ will be considered a # term and the documents that contain that term in the +:id_field+ will be # deleted. def delete(arg) @dir.synchronize do if arg.is_a?(String) or arg.is_a?(Symbol) ensure_writer_open() @writer.delete(@id_field, arg.to_s) elsif arg.is_a?(Integer) ensure_reader_open() cnt = @reader.delete(arg) elsif arg.is_a?(Hash) or arg.is_a?(Array) batch_delete(arg) else raise ArgumentError, "Cannot delete for arg of type #{arg.class}" end flush() if @auto_flush end return self end # Delete all documents returned by the query. # # query:: The query to find documents you wish to delete. Can either be a # string (in which case it is parsed by the standard query parser) # or an actual query object. def query_delete(query) @dir.synchronize do ensure_writer_open() ensure_searcher_open() query = do_process_query(query) @searcher.search_each(query, :limit => :all) do |doc, score| @reader.delete(doc) end flush() if @auto_flush end end # Returns true if document +n+ has been deleted def deleted?(n) @dir.synchronize do ensure_reader_open() return @reader.deleted?(n) end end # Update the document referenced by the document number +id+ if +id+ is an # integer or all of the documents which have the term +id+ if +id+ is a # term.. # For batch update of set of documents, for performance reasons, see batch_update # # id:: The number of the document to update. Can also be a string # representing the value in the +id+ field. Also consider using # the :key attribute. # new_doc:: The document to replace the old document with def update(id, new_doc) @dir.synchronize do ensure_writer_open() delete(id) if id.is_a?(String) or id.is_a?(Symbol) @writer.commit else ensure_writer_open() end @writer << new_doc flush() if @auto_flush end end # Batch updates the documents in an index. You can pass either a Hash or # an Array. # # === Array (recommended) # # If you pass an Array then each value needs to be a Document or a Hash # and each of those documents must have an +:id_field+ which will be used # to delete the old document that this document is replacing. # # === Hash # # If you pass a Hash then the keys of the Hash will be considered the # +id+'s and the values will be the new documents to replace the old ones # with.If the +id+ is an Integer then it is considered a Ferret document # number and the corresponding document will be deleted. If the +id+ is a # String or a Symbol then the +id+ will be considered a term and the # documents that contain that term in the +:id_field+ will be deleted. # # Note: No error will be raised if the document does not currently # exist. A new document will simply be created. # # == Examples # # # will replace the documents with the +id+'s id:133 and id:254 # @index.batch_update({ # '133' => {:id => '133', :content => 'yada yada yada'}, # '253' => {:id => '253', :content => 'bla bla bal'} # }) # # # will replace the documents with the Ferret Document numbers 2 and 92 # @index.batch_update({ # 2 => {:id => '133', :content => 'yada yada yada'}, # 92 => {:id => '253', :content => 'bla bla bal'} # }) # # # will replace the documents with the +id+'s id:133 and id:254 # # this is recommended as it guarantees no duplicate keys # @index.batch_update([ # {:id => '133', :content => 'yada yada yada'}, # {:id => '253', :content => 'bla bla bal'} # ]) # # docs:: A Hash of id/document pairs. The set of documents to be updated def batch_update(docs) @dir.synchronize do ids = values = nil case docs when Array ids = docs.collect{|doc| doc[@id_field].to_s} if ids.include?(nil) raise ArgumentError, "all documents must have an #{@id_field} " "field when doing a batch update" end when Hash ids = docs.keys docs = docs.values else raise ArgumentError, "must pass Hash or Array, not #{docs.class}" end batch_delete(ids) ensure_writer_open() docs.each {|new_doc| @writer << new_doc } flush() end end # Update all the documents returned by the query. # # query:: The query to find documents you wish to update. Can either be # a string (in which case it is parsed by the standard query # parser) or an actual query object. # new_val:: The values we are updating. This can be a string in which case # the default field is updated, or it can be a hash, in which # case, all fields in the hash are merged into the old hash. # That is, the old fields are replaced by values in the new hash # if they exist. # # === Example # # index << {:id => "26", :title => "Babylon", :artist => "David Grey"} # index << {:id => "29", :title => "My Oh My", :artist => "David Grey"} # # # correct # index.query_update('artist:"David Grey"', {:artist => "David Gray"}) # # index["26"] # #=> {:id => "26", :title => "Babylon", :artist => "David Gray"} # index["28"] # #=> {:id => "28", :title => "My Oh My", :artist => "David Gray"} # def query_update(query, new_val) @dir.synchronize do ensure_writer_open() ensure_searcher_open() docs_to_add = [] query = do_process_query(query) @searcher.search_each(query, :limit => :all) do |id, score| document = @searcher[id].load if new_val.is_a?(Hash) document.merge!(new_val) else new_val.is_a?(String) or new_val.is_a?(Symbol) document[@default_input_field] = new_val.to_s end docs_to_add << document @reader.delete(id) end ensure_writer_open() docs_to_add.each {|doc| @writer << doc } flush() if @auto_flush end end # Returns true if any documents have been deleted since the index was last # flushed. def has_deletions?() @dir.synchronize do ensure_reader_open() return @reader.has_deletions? end end # Flushes all writes to the index. This will not optimize the index but it # will make sure that all writes are written to it. # # NOTE: this is not necessary if you are only using this class. All writes # will automatically flush when you perform an operation that reads the # index. def flush() @dir.synchronize do if @reader if @searcher @searcher.close @searcher = nil end @reader.commit elsif @writer @writer.close @writer = nil end end end alias :commit :flush # optimizes the index. This should only be called when the index will no # longer be updated very often, but will be read a lot. def optimize() @dir.synchronize do ensure_writer_open() @writer.optimize() @writer.close() @writer = nil end end # returns the number of documents in the index def size() @dir.synchronize do ensure_reader_open() return @reader.num_docs() end end # Merges all segments from an index or an array of indexes into this # index. You can pass a single Index::Index, Index::Reader, # Store::Directory or an array of any single one of these. # # This may be used to parallelize batch indexing. A large document # collection can be broken into sub-collections. Each sub-collection can # be indexed in parallel, on a different thread, process or machine and # perhaps all in memory. The complete index can then be created by # merging sub-collection indexes with this method. # # After this completes, the index is optimized. def add_indexes(indexes) @dir.synchronize do ensure_writer_open() indexes = [indexes].flatten # make sure we have an array return if indexes.size == 0 # nothing to do if indexes[0].is_a?(Index) indexes.delete(self) # don't merge with self indexes = indexes.map {|index| index.reader } elsif indexes[0].is_a?(Ferret::Store::Directory) indexes.delete(@dir) # don't merge with self indexes = indexes.map {|dir| IndexReader.new(dir) } elsif indexes[0].is_a?(IndexReader) indexes.delete(@reader) # don't merge with self else raise ArgumentError, "Unknown index type when trying to merge indexes" end ensure_writer_open @writer.add_readers(indexes) end end # This is a simple utility method for saving an in memory or RAM index to # the file system. The same thing can be achieved by using the # Index::Index#add_indexes method and you will have more options when # creating the new index, however this is a simple way to turn a RAM index # into a file system index. # # directory:: This can either be a Store::Directory object or a String # representing the path to the directory where you would # like to store the index. # # create:: True if you'd like to create the directory if it doesn't # exist or copy over an existing directory. False if you'd # like to merge with the existing directory. This defaults to # false. def persist(directory, create = true) synchronize do close_all() old_dir = @dir if directory.is_a?(String) @dir = FSDirectory.new(directory, create) elsif directory.is_a?(Ferret::Store::Directory) @dir = directory end @dir.extend(MonitorMixin) unless @dir.kind_of? MonitorMixin @options[:dir] = @dir @options[:create_if_missing] = true add_indexes([old_dir]) end end def to_s buf = "" (0...(size)).each do |i| buf << self[i].to_s + "\n" if not deleted?(i) end buf end # Returns an Explanation that describes how +doc+ scored against # +query+. # # This is intended to be used in developing Similarity implementations, # and, for good performance, should not be displayed with every hit. # Computing an explanation is as expensive as executing the query over the # entire index. def explain(query, doc) @dir.synchronize do ensure_searcher_open() query = do_process_query(query) return @searcher.explain(query, doc) end end # Turn a query string into a Query object with the Index's QueryParser def process_query(query) @dir.synchronize do ensure_searcher_open() return do_process_query(query) end end # Returns the field_infos object so that you can add new fields to the # index. def field_infos @dir.synchronize do ensure_writer_open() return @writer.field_infos end end protected def ensure_writer_open() raise "tried to use a closed index" if not @open return if @writer if @reader @searcher.close if @searcher @reader.close @reader = nil @searcher = nil end @writer = IndexWriter.new(@options) end # returns the new reader if one is opened def ensure_reader_open(get_latest = true) raise "tried to use a closed index" if not @open if @reader if get_latest latest = false begin latest = @reader.latest? rescue Lock::LockError => le sleep(@options[:lock_retry_time]) # sleep for 2 seconds and try again latest = @reader.latest? end if not latest @searcher.close if @searcher @reader.close return @reader = IndexReader.new(@dir) end end else if @writer @writer.close @writer = nil end return @reader = IndexReader.new(@dir) end return false end def ensure_searcher_open() raise "tried to use a closed index" if not @open if ensure_reader_open() or not @searcher @searcher = Searcher.new(@reader) end end private def do_process_query(query) if query.is_a?(String) if @qp.nil? @qp = Ferret::QueryParser.new(@options) end # we need to set this every time, in case a new field has been added @qp.fields = @reader.fields unless options[:all_fields] || options[:fields] @qp.tokenized_fields = @reader.tokenized_fields unless options[:tokenized_fields] query = @qp.parse(query) end return query end def do_search(query, options) ensure_searcher_open() query = do_process_query(query) return @searcher.search(query, options) end def close_all() @dir.synchronize do @searcher.close if @searcher @reader.close if @reader @writer.close if @writer @reader = nil @searcher = nil @writer = nil end end # If +docs+ is a Hash or an Array then a batch delete will be performed. # If +docs+ is an Array then it will be considered an array of +id+'s. If # it is a Hash, then its keys will be used instead as the Array of # document +id+'s. If the +id+ is an Integers then it is considered a # Ferret document number and the corresponding document will be deleted. # If the +id+ is a String or a Symbol then the +id+ will be considered a # term and the documents that contain that term in the +:id_field+ will # be deleted. # # docs:: An Array of docs to be deleted, or a Hash (in which case the keys # are used) def batch_delete(docs) docs = docs.keys if docs.is_a?(Hash) raise ArgumentError, "must pass Array or Hash" unless docs.is_a? Array ids = [] terms = [] docs.each do |doc| case doc when String then terms << doc when Symbol then terms << doc.to_s when Integer then ids << doc else raise ArgumentError, "Cannot delete for arg of type #{id.class}" end end if ids.size > 0 ensure_reader_open ids.each {|id| @reader.delete(id)} end if terms.size > 0 ensure_writer_open() @writer.delete(@id_field, terms) end return self end end end module Ferret I = Index::Index end ferret-0.11.8.6/lib/ferret/number_tools.rb0000644000004100000410000001260412476264460020374 0ustar www-datawww-datarequire 'date' require 'time' class Float # Return true if the float is within +precision+ of the other value +o+. This # is used to accommodate for floating point errors. # # o:: value to compare with # precision:: the precision to use in the comparison. # return:: true if the match is within +precision+ def =~(o, precision = 0.0000000001) return (1 - self/o).abs < precision end end # Provides support for converting integers to Strings, and back again. The # strings are structured so that lexicographic sorting order is preserved. # # That is, if integer1 is less than integer2 for any two integers integer1 and # integer2, then integer1.to_s_lex is lexicographically less than # integer2.to_s_lex. (Similarly for "greater than" and "equals".) # # This class handles numbers between - 10 ** 10,000 and 10 ** 10,000 # which should cover all practical numbers. If you need bigger numbers, # increase Integer::LEN_STR_SIZE. class Integer # LEN_SIZE of 4 should handle most numbers that can practically be held in # memory. LEN_STR_SIZE = 4 NEG_LEN_MASK = 10 ** LEN_STR_SIZE LEN_STR_TEMPLATE = "%0#{LEN_STR_SIZE}d" # Convert the number to a lexicographically sortable string. This string will # use printable characters only but will not be human readable. def to_s_lex if (self >= 0) num_str = self.to_s len_str = LEN_STR_TEMPLATE % num_str.size return len_str + num_str else num = self * -1 num_str = num.to_s num_len = num_str.size len_str = LEN_STR_TEMPLATE % (NEG_LEN_MASK - num_len) num = (10 ** num_str.size) - num return "-#{len_str}%0#{num_len}d" % num end end # Convert the number to a lexicographically sortable string by padding with # 0s. You should make sure that you set the width to a number large enough to # accommodate all possible values. Also note that this method will not work # with negative numbers. That is negative numbers will sort in the opposite # direction as positive numbers. If you have very large numbers or a mix of # positive and negative numbers you should use the Integer#to_s_lex method # # width:: number of characters in the string returned. Default is 10. So # 123.to_s_pad(5) => 00123 and -123.to_s_pad(5) => -0123 # return:: padding string representation of the number. def to_s_pad(width = 10) "%#{width}d" % self end end class Date # Convert the Date to a lexicographically sortable string with the required # precision. The format used is %Y%m%d # # precision:: the precision required in the string version of the date. The # options are :year, :month and :day # return:: a lexicographically sortable string representing the date def to_s_lex(precision = :day) self.strftime(Time::LEX_FORMAT[precision]) end end class DateTime # Convert the DateTime to a lexicographically sortable string with the # required precision. The format used is %Y%m%d %H:%M:%S. # # precision:: the precision required in the string version of the date. The # options are :year, :month, :day, :hour, :minute and :second # return:: a lexicographically sortable string representing the date def to_s_lex(precision = :day) self.strftime(Time::LEX_FORMAT[precision]) end end class Time LEX_FORMAT = { :year => "%Y", :month => "%Y-%m", :day => "%Y-%m-%d", :hour => "%Y-%m-%d %H", :minute => "%Y-%m-%d %H:%M", :second => "%Y-%m-%d %H:%M:%S", :millisecond => "%Y-%m-%d %H:%M:%S" } # Convert the Time to a lexicographically sortable string with the required # precision. The format used is %Y%m%d %H:%M:%S. # # precision:: the precision required in the string version of the time. The # options are :year, :month, :day, :hour, :minute and :second # return:: a lexicographically sortable string representing the date def to_s_lex(precision = :day) self.strftime(LEX_FORMAT[precision]) end end class String # Convert a string to an integer. This method will only work on strings that # were previously created with Integer#to_s_lex, otherwise the result will be # unpredictable. def to_i_lex if (self[0] == ?-) return self[(Integer::LEN_STR_SIZE + 1)..-1].to_i - 10 ** (self.size - Integer::LEN_STR_SIZE - 1) else return self[Integer::LEN_STR_SIZE..-1].to_i end end # Convert a string to a Time. This method will only work on strings that # match the format %Y%m%d %H%M%S, otherwise the result will be unpredictable. def to_time_lex vals = [] self.gsub(/(?:^|[- :])(\d+)/) {vals << $1.to_i; $&} Time.mktime(*vals) end # Convert a string to a Date. This method will only work on strings that # match the format %Y%m%d %H%M%S, otherwise the result will be unpredictable. def to_date_lex return Date.strptime(self + "-02-01", "%Y-%m-%d") end # Convert a string to a DateTime. This method will only work on strings that # match the format %Y%m%d %H%M%S, otherwise the result will be unpredictable. def to_date_time_lex return DateTime.strptime(self + "-01-01", "%Y-%m-%d %H:%M:%S") end private def get_lex_format(len) case len when 0.. 3 then "" when 4.. 5 then "%Y" when 6.. 7 then "%Y%m" when 8.. 9 then "%Y%m%d" when 10..11 then "%Y%m%d%H" when 12..13 then "%Y%m%d%H%M" else "%Y%m%d%H%M%S" end end end ferret-0.11.8.6/lib/ferret/browser.rb0000644000004100000410000001555712476264460017361 0ustar www-datawww-datarequire 'erb' module Ferret::Browser class Delegator def initialize(reader, path) @reader, @path = reader, path end def run(env) controller, action, args = :home, :index, nil query_string = env['QUERY_STRING']||'' params = parse_query_string(query_string) req_path = env['PATH_INFO'].gsub(/\/+/, '/') case req_path when '/' # nothing to do when /^\/?([-a-zA-Z]+)\/?$/ controller = $1 when /^\/?([-a-zA-Z]+)\/([-a-zA-Z]+)\/?(.*)?$/ controller = $1 action = $2 args = $3 else controller = :error args = req_path end controller_vars = { :params => params, :req_path => req_path, :query_string => query_string, } delegate(controller, action, args, controller_vars) end private def delegate(controller, action, args, controller_vars) begin controller = to_const(controller, 'Controller'). new(@reader, @path, controller_vars) controller.send(action, args) rescue Exception => e puts e.to_s controller_vars[:params][:error] = e ErrorController.new(@reader, @path, controller_vars).index end end def to_const(str, suffix='') Ferret::Browser.const_get(str.to_s.split('-'). map {|w| w.capitalize}.join('') + suffix) end # from _why's camping def unescape_uri(s) s.tr('+', ' ').gsub(/%([\da-f]{2})/in){[$1].pack('H*')} end def parse_query_string(query_string, delim = '&;') m = proc {|_,o,n| o.update(n, &m) rescue ([*o] << n)} (query_string||'').split(/[#{delim}] */n). inject({}) { |hash, param| key, val = unescape_uri(param).split('=',2) hash.update(key.split(/[\]\[]+/).reverse. inject(val) { |x,i| Hash[i,x] }, &m) } end end module ViewHelper # truncates the string at the first space after +len+ characters def truncate(str, len = 80) if str and str.length > len and (add = str[len..-1].index(' ')) str = str[0, len + add] + '…' end str end def tick_or_cross(t) "\"#{t" end end class Controller include ViewHelper APP_DIR = File.expand_path(File.join(File.dirname(__FILE__), "browser/")) STATIC_DIR = File.expand_path(File.join(APP_DIR, "s/")) def initialize(reader, path, vars) @reader = reader @path = path vars.each_pair {|key, val| instance_eval("@#{key} = val")} @controller_path = pathify(self.class.to_s.gsub(/.*:/, '')) end def method_missing(meth_id, *args) render(:action => meth_id) end protected def load_page(page) File.read(File.join(APP_DIR, page)) end def render(options = {}) options = { :controller => @controller_path, :action => :index, :status => 200, :content_type => 'text/html', :env => nil, :layout => 'views/layout.rhtml', }.update(options) path = "views/#{options[:controller]}/#{options[:action]}.rhtml" content = ERB.new(load_page(path)).result(lambda{}) if options[:layout] content = ERB.new(load_page(options[:layout])).result(lambda{}) end return options[:status], options[:content_type], content end # takes an optional block to set optional attributes in the links def paginate(idx, max, url, &b) return '' if max == 0 url = url.gsub(%r{^/?(.*?)/?$}, '\1') b ||= lambda{} link = lambda {|*args| i, title, text = args "#{text||i}" } res = '' end private def pathify(str) str.gsub(/Controller$/, '').gsub(/([a-z])([A-Z])/) {"#{$1}-#{$2}"}.downcase end end class ErrorController < Controller def index render(:status => 404) end end class HomeController < Controller end class DocumentController < Controller def list(page = 0) @page = (page||0).to_i @page_size = @params[:page_size]||10 @first = @page * @page_size @last = [@reader.max_doc, (@page + 1) * @page_size].min render(:action => :list) end alias :index :list def show(doc_id) doc_id = @params['doc_id']||doc_id||'0' if doc_id !~ /^\d+$/ raise ArgumentError.new("invalid document number '#{doc_id}'") end @doc_id = doc_id.to_i @doc = @reader[@doc_id].load unless @reader.deleted?(@doc_id) render(:action => :show) end private def choose_document(doc_id='') <<-EOF
EOF end def paginate_docs paginate(@doc_id, @reader.max_doc, '/document/show/') {|i| 'deleted' if @reader.deleted?(i) } end end class TermController < Controller def show(field) if field and field.length > 0 @field = field.to_sym @terms = @reader.terms(@field).to_json(:fast) end render(:action => :index) end def termdocs(args) args = args.split('/') @field = args.shift.intern @term = args.join('/') render(:action => :termdocs, :content_type => 'text/plain', :layout => false) end end class TermVectorController < Controller end class HelpController < Controller end end ferret-0.11.8.6/lib/ferret/field_infos.rb0000644000004100000410000000200212476264460020134 0ustar www-datawww-datarequire 'yaml' module Ferret::Index class FieldInfos # Load FieldInfos from a YAML file. The YAML file should look something like # this: # default: # store: :yes # index: :yes # term_vector: :no # # fields: # id: # index: :untokenized # term_vector: :no # # title: # boost: 20.0 # term_vector: :no # # content: # term_vector: :with_positions_offsets # def self.load(yaml_str) info = YAML.load(yaml_str) convert_strings_to_symbols(info) fis = FieldInfos.new(info[:default]) fields = info[:fields] fields.keys.each {|key| fis.add_field(key, fields[key])} if fields fis end private def self.convert_strings_to_symbols(hash) hash.keys.each do |key| convert_strings_to_symbols(hash[key]) if hash[key].is_a?(Hash) if key.is_a?(String) hash[key.intern] = hash[key] hash.delete(key) end end end end end ferret-0.11.8.6/lib/ferret/document.rb0000644000004100000410000001034712476264460017504 0ustar www-datawww-datamodule Ferret # Instead of using documents to add data to an index you can use Hashes and # Arrays. The only real benefits of using a Document over a Hash are pretty # printing and the boost attribute. You can add the boost attribute to # Hashes and arrays using the BoostMixin. For example; # # class Hash # include BoostMixin # end # # class Array # include BoostMixin # end # # class String # include BoostMixin # end module BoostMixin attr_accessor :boost end # Documents are the unit of indexing and search. # # A Document is a set of fields. Each field has a name and an array of # textual values. If you are coming from a Lucene background you should note # that Fields don't have any properties except for the boost property. You # should use the Ferret::Index::FieldInfos class to set field properties # across the whole index instead. # # === Boost # # The boost attribute makes a Document more important in the index. That is, # you can increase the score of a match for queries that match a particular # document, making it more likely to appear at the top of search results. # You may, for example, want to boost products that have a higher user # rating so that they are more likely to appear in search results. # # Note: that fields which are _not_ stored (see Ferret::Index::FieldInfos) # are _not_ available in documents retrieved from the index, e.g. # Ferret::Search::Searcher#doc or Ferret::Index::IndexReader#doc. # # Note: that modifying a Document retrieved from the index will not modify # the document contained within the index. You need to delete the old # version of the document and add the new version of the document. class Document < Hash include BoostMixin # Create a new Document object with a boost. The boost defaults to 1.0. def initialize(boost = 1.0) @boost = boost end # Return true if the documents are equal, ie they have the same fields def eql?(o) return (o.is_a? Document and (o.boost == @boost) and (self.keys == o.keys) and (self.values == o.values)) end alias :== :eql? # Create a string representation of the document def to_s buf = ["Document {"] self.keys.sort_by {|key| key.to_s}.each do |key| val = self[key] val_str = if val.instance_of? Array then %{["#{val.join('", "')}"]} elsif val.is_a? Field then val.to_s else %{"#{val.to_s}"} end buf << " :#{key} => #{val_str}" end buf << ["}#{@boost == 1.0 ? "" : "^" + @boost.to_s}"] return buf.join("\n") end end # A Field is a section of a Document. A Field is basically an array with a # boost attribute. It also provides pretty printing of the field with the # #to_s method. # # === Boost # # The boost attribute makes a field more important in the index. That is, # you can increase the score of a match for queries that match terms in a # boosted field. You may, for example, want to boost a title field so that # matches that match in the :title field score more highly than matches that # match in the :contents field. # # Note: If you'd like to use boosted fields without having to use # the Field class you can just include the BoostMixin in the Array class. # See BoostMixin. class Field < Array include BoostMixin # Create a new Field object. You can pass data to the field as either a # string; # # f = Field.new("This is the fields data") # # or as an array of strings; # # f = Field.new(["this", "is", "an", "array", "of", "field", "data"]) # # Of course Fields can also be boosted; # # f = Field.new("field data", 1000.0) def initialize(data = [], boost = 1.0) @boost = boost if data.is_a? Array data.each {|v| self << v} else self << data.to_s end end def eql?(o) return (o.is_a? Field and (o.boost == @boost) and super(o)) end alias :== :eql? def +(o) return Field.new(super(o), self.boost) end def to_s buf = %{["#{self.join('", "')}"]} buf << "^#@boost" if @boost != 1.0 return buf end end end ferret-0.11.8.6/lib/ferret/version.rb0000644000004100000410000000005112476264460017342 0ustar www-datawww-datamodule Ferret VERSION = '0.11.8.6' end ferret-0.11.8.6/lib/ferret/field_symbol.rb0000644000004100000410000000514612476264460020337 0ustar www-datawww-datamodule Ferret FIELD_TYPES = %w(integer float string byte).map{|t| t.to_sym} if defined?(BasicObject) # Ruby 1.9.x class BlankSlate < BasicObject end else # Ruby 1.8.x # BlankSlate is a class with no instance methods except for __send__ and # __id__. It is useful for creating proxy classes. It is currently used by # the FieldSymbol class which is a proxy to the Symbol class class BlankSlate instance_methods.each { |m| undef_method m unless m =~ /^__|object_id/ } end end # The FieldSymbolMethods module contains the methods that are added to both # the Symbol class and the FieldSymbol class. These methods allow you to set # the type easily set the type of a field by calling a method on a symbol. # # Right now this is only useful for Sorting and grouping, but some day Ferret # may have typed fields, in which case these this methods will come in handy. # # The available types are specified in Ferret::FIELD_TYPES. # # == Examples # # index.search(query, :sort => :title.string.desc) # # index.search(query, :sort => [:price.float, :count.integer.desc]) # # index.search(query, :group_by => :catalogue.string) # # == Note # # If you set the field type multiple times, the last type specified will be # the type used. For example; # # puts :title.integer.float.byte.string.type.inspect # => :string # # Calling #desc twice will set desc? to false # # puts :title.desc? # => false # puts :title.desc.desc? # => true # puts :title.desc.desc.desc? # => false module FieldSymbolMethods FIELD_TYPES.each do |method| define_method(method) do fsym = FieldSymbol.new(self, respond_to?(:desc?) ? desc? : false) fsym.type = method fsym end end # Set a field to be a descending field. This only makes sense in sort # specifications. def desc fsym = FieldSymbol.new(self, respond_to?(:desc?) ? !desc? : true) fsym.type = type if respond_to? :type fsym end # Return whether or not this field should be a descending field def desc? @desc == true end # Return the type of this field def type @type || nil end end # See FieldSymbolMethods class FieldSymbol < BlankSlate include FieldSymbolMethods def initialize(symbol, desc = false) @symbol = symbol @desc = desc end def method_missing(method, *args) @symbol.__send__(method, *args) end attr_writer :type, :desc end end # See FieldSymbolMethods class Symbol include Ferret::FieldSymbolMethods end ferret-0.11.8.6/lib/ferret/browser/0000755000004100000410000000000012476264460017017 5ustar www-datawww-dataferret-0.11.8.6/lib/ferret/browser/views/0000755000004100000410000000000012476264460020154 5ustar www-datawww-dataferret-0.11.8.6/lib/ferret/browser/views/help/0000755000004100000410000000000012476264460021104 5ustar www-datawww-dataferret-0.11.8.6/lib/ferret/browser/views/help/index.rhtml0000644000004100000410000000062312476264460023264 0ustar www-datawww-data

Ferret Browser Help

At the moment there is no help for the Ferret Browser. Everything should be fairly self explanatory. If not, you can post a question on the mailing list or you can edit the FerretBrowser wiki page.

ferret-0.11.8.6/lib/ferret/browser/views/home/0000755000004100000410000000000012476264460021104 5ustar www-datawww-dataferret-0.11.8.6/lib/ferret/browser/views/home/index.rhtml0000644000004100000410000000156412476264460023271 0ustar www-datawww-data

Index Info

Number of Documents
<%= @reader.num_docs %>
Maximum Document
<%= @reader.max_doc %>

Field Infos

<% field_props = [:stored?, :compressed?, :indexed?, :tokenized?, :omit_norms?, :store_term_vector?, :store_positions?, :store_offsets?, :has_norms?] %> <% field_props.each do |prop| %> <% end %> <% even=false; @reader.field_infos.each do |fi| %> <% field_props.each do |prop| %> <% end %> <% end %>
Field Name Boost<%= prop %>
<%= fi.name %> <%= fi.boost %><%= tick_or_cross(fi.send(prop)) %>
ferret-0.11.8.6/lib/ferret/browser/views/document/0000755000004100000410000000000012476264460021772 5ustar www-datawww-dataferret-0.11.8.6/lib/ferret/browser/views/document/show.rhtml0000644000004100000410000000100512476264460024016 0ustar www-datawww-data

View Document

<%= choose_document(@doc_id) %>
<%= paginate_docs %> <% total_length = 0 %>
<% if @doc %>

Fields

<% @doc.each_pair do |field, value| %> <% total_length += value.length %>
<%= field %>
<% if value.length > 50 %>
<%= value %>
<% else %> <%= value %> <% end %>
<% end %>

<% else %>

Document has been deleted

<% end %>
<%= (total_length > 1000) ? paginate_docs : '' %> ferret-0.11.8.6/lib/ferret/browser/views/document/list.rhtml0000644000004100000410000000263312476264460024021 0ustar www-datawww-data

List Documents

Show fields
<% fields = []; field_settings = @params["field"]||{}; @reader.field_infos.each do |fi| next unless fi.stored? %> <% end %>

<%= paginate(@page, @reader.max_doc/@page_size, '/document/list/') %>
<% fields.each do |field| %> <% end %> <% even=false; (@first...@last).each do |i| %> <% if @reader.deleted?(i) %> <% else %> <% doc = @reader[i]; fields.each do |field| %> <% end %> <% end %> <% end %>
Internal ID<%= field %>
<%=i%> is deleted
<%= i %><%= truncate(doc[field]) %>

<%= choose_document(@doc_id) %> ferret-0.11.8.6/lib/ferret/browser/views/term/0000755000004100000410000000000012476264460021123 5ustar www-datawww-dataferret-0.11.8.6/lib/ferret/browser/views/term/index.rhtml0000644000004100000410000001426712476264460023314 0ustar www-datawww-data

Terms


<% if @terms %>

Field: <%= @field %>




Documents




<% end %>
ferret-0.11.8.6/lib/ferret/browser/views/term/termdocs.rhtml0000644000004100000410000000010012476264460024002 0ustar www-datawww-data<%= @reader.term_positions_for(@field, @term).to_json(:fast) %> ferret-0.11.8.6/lib/ferret/browser/views/term-vector/0000755000004100000410000000000012476264460022423 5ustar www-datawww-dataferret-0.11.8.6/lib/ferret/browser/views/term-vector/index.rhtml0000644000004100000410000000006712476264460024605 0ustar www-datawww-data

Term Vectors

Nothing to see here yet

ferret-0.11.8.6/lib/ferret/browser/views/error/0000755000004100000410000000000012476264460021305 5ustar www-datawww-dataferret-0.11.8.6/lib/ferret/browser/views/error/index.rhtml0000644000004100000410000000023512476264460023464 0ustar www-datawww-data

<%= options[:status] %> Error

<% if @params[:error] %>
<%= @params[:error] %>
<%= @params[:error].backtrace.join('
') %>
<% end %> ferret-0.11.8.6/lib/ferret/browser/views/layout.rhtml0000644000004100000410000000163612476264460022547 0ustar www-datawww-data Ferret-Browser:: <%= @path %>
<%= content %>
ferret-0.11.8.6/lib/ferret/browser/webrick.rb0000644000004100000410000000076112476264460020776 0ustar www-datawww-datamodule WEBrick class FerretBrowserHandler < WEBrick::HTTPServlet::AbstractServlet # Creates a FerretBrowserHandler, which answers for the application # within +klass+. def initialize(server, reader, path) super(server) @delegator = Ferret::Browser::Delegator.new(reader, path) end # Handler for WEBrick requests (also aliased as do_POST). def do_GET(req, res) res.status, res.content_type, res.body = @delegator.run(req.meta_vars) end end end ferret-0.11.8.6/lib/ferret/browser/s/0000755000004100000410000000000012476264460017261 5ustar www-datawww-dataferret-0.11.8.6/lib/ferret/browser/s/global.js0000644000004100000410000001245712476264460021070 0ustar www-datawww-datafunction AutoSuggestControl(oTextbox, oProvider) { this.cur = -1; this.layer = null; this.provider = oProvider; this.textbox = oTextbox; this.init(); } AutoSuggestControl.prototype.init = function() { var oThis = this; this.textbox.setAttribute("autocomplete", "off"); this.textbox.onkeyup = function(oEvent) { if (!oEvent) { oEvent = window.event; } oThis.handleKeyUp(oEvent); }; this.textbox.onkeydown = function (oEvent) { if (!oEvent) { oEvent = window.event; } oThis.handleKeyDown(oEvent); }; this.textbox.onblur = function () { oThis.hideSuggestions(); }; this.createDropDown(); }; AutoSuggestControl.prototype.selectRange = function(iStart, iLength) { if (this.textbox.createTextRange) { var oRange = this.textbox.createTextRange(); oRange.moveStart("character", iStart); oRange.moveEnd("character", iLength - this.textbox.value.length); oRange.select(); } else if (this.textbox.setSelectionRange) { this.textbox.setSelectionRange(iStart, iLength); } this.textbox.focus(); }; AutoSuggestControl.prototype.typeAhead = function(sSuggestion) { if (this.textbox.createTextRange || this.textbox.setSelectionRange) { var iLen = this.textbox.value.length; this.textbox.value = sSuggestion; this.selectRange(iLen, sSuggestion.length); } }; AutoSuggestControl.prototype.autosuggest = function(aSuggestions, bTypeAhead) { if (aSuggestions.length > 0) { if (bTypeAhead) { this.typeAhead(aSuggestions[0]); } this.showSuggestions(aSuggestions); } else { this.hideSuggestions(); } }; AutoSuggestControl.prototype.handleKeyUp = function(oEvent) { var iKeyCode = oEvent.keyCode; if (iKeyCode == 8 || iKeyCode == 46) { this.provider.requestSuggestions(this, false); } else if (iKeyCode < 32 || (iKeyCode >= 33 && iKeyCode <= 46) || (iKeyCode >= 112 && iKeyCode <= 123)) { //ignore } else { this.provider.requestSuggestions(this, true); } }; AutoSuggestControl.prototype.handleKeyDown = function (oEvent) { switch(oEvent.keyCode) { case 38: //up arrow this.previousSuggestion(); break; case 40: //down arrow this.nextSuggestion(); break; case 13: //enter this.hideSuggestions(); break; } }; AutoSuggestControl.prototype.hideSuggestions = function() { this.layer.style.visibility = "hidden"; }; AutoSuggestControl.prototype.highlightSuggestion = function(oSuggestionNode) { for (var i=0; i < this.layer.childNodes.length; i++) { var oNode = this.layer.childNodes[i]; if (oNode == oSuggestionNode) { oNode.className = "current" } else if (oNode.className == "current") { oNode.className = ""; } } }; AutoSuggestControl.prototype.createDropDown = function() { this.layer = document.createElement("div"); this.layer.className = "suggestions"; this.layer.style.visibility = "hidden"; this.layer.style.width = this.textbox.offsetWidth; document.body.appendChild(this.layer); var oThis = this; this.layer.onmousedown = this.layer.onmouseup = this.layer.onmouseover = function(oEvent) { oEvent = oEvent || window.event; oTarget = oEvent.target || oEvent.srcElement; if (oEvent.type == "mousedown") { oThis.textbox.value = oTarget.firstChild.nodeValue; oThis.hideSuggestions(); } else if (oEvent.type == "mouseover") { oThis.highlightSuggestion(oTarget); } else { oThis.textbox.focus(); } }; }; AutoSuggestControl.prototype.getLeft = function() { var oNode = this.textbox; var iLeft = 0; while (oNode.tagName != "BODY") { iLeft += oNode.offsetLeft; oNode = oNode.offsetParent; } return iLeft; }; AutoSuggestControl.prototype.getTop = function() { var oNode = this.textbox; var iTop = 0; while (oNode.tagName != "BODY") { iTop += oNode.offsetTop; oNode = oNode.offsetParent; } return iTop; }; AutoSuggestControl.prototype.showSuggestions = function(aSuggestions) { var oDiv = null; this.layer.innerHTML = ""; for (var i = 0; i < aSuggestions.length; i++) { oDiv = document.createElement("div"); oDiv.appendChild(document.createTextNode(aSuggestions[i])); this.layer.appendChild(oDiv); } this.layer.style.left = this.getLeft() + "px"; this.layer.style.top = (this.getTop()+this.textbox.offsetHeight) + "px"; this.layer.style.width = this.textbox.offsetWidth + "px"; this.layer.style.visibility = "visible"; }; AutoSuggestControl.prototype.nextSuggestion = function() { var cSuggestionNodes = this.layer.childNodes; if (cSuggestionNodes.length > 0 && this.cur < cSuggestionNodes.length-1) { var oNode = cSuggestionNodes[++this.cur]; this.highlightSuggestion(oNode); this.textbox.value = oNode.firstChild.nodeValue; } }; AutoSuggestControl.prototype.previousSuggestion = function() { var cSuggestionNodes = this.layer.childNodes; if (cSuggestionNodes.length > 0 && this.cur > 0) { var oNode = cSuggestionNodes[--this.cur]; this.highlightSuggestion(oNode); this.textbox.value = oNode.firstChild.nodeValue; } }; function bsearch(aArray, item, less_than) { var left = -1, right = aArray.length, mid; while (right > left + 1) { mid = (left + right) >>> 1; if (less_than(aArray[mid], item)) { left = mid; } else { right = mid; } } return right; } ferret-0.11.8.6/lib/ferret/browser/s/style.css0000644000004100000410000000601312476264460021133 0ustar www-datawww-databody { padding: 10px 30px; } ul#top-menu { margin: 0; padding: 0; padding-left: 20px; border-bottom: 1px solid #000099; } ul#top-menu li { display: inline; padding: 0px; } ul#top-menu li a, ul#top-menu li a:visited { background-color: #cce6ff; padding: 0px 5px; border: 1px solid #000099; text-decoration: none; color: #000099; font-weight: bold; } body#home ul#top-menu li.home a, body#document ul#top-menu li.document a, body#term ul#top-menu li.term a, body#term-vector ul#top-menu li.term-vector a, body#help ul#top-menu li.help a { background-color: #f0f8ff; border-bottom: 1px solid #f0f8ff; } div#content { padding: 10px; margin: 0; background-color: #f0f8ff; } div.display {background-color:#fff;padding:0 5px;} dl { width: 51.1em; margin: 0; padding: 0; } dl dt { width: 15em; float: left; margin: 0; padding: .5em; border-top: 1px solid #999; font-weight: bold; color: #cc9900; } /* commented backslash hack for mac-ie5 \*/ dt { clear: both; } /* end hack */ dl dd { float: left; width: 34em; margin: 0 0 0 0; padding: .5em; border-top: 1px solid #999; } hr {clear:both;border:0;} h1, h2, h3, h4, h5, h6 {color:#cc0033;} th { font-weight: bold; background-color: #cc9900; color: white; padding: 3px; font-size: 12px; } th.small, td.small {font-size:11px;} th, td { border-top: 1px solid #994400; border-left: 1px solid #994400; } table { border-bottom: 1px solid #994400; border-right: 1px solid #994400; } table.left-headed th {width:80px;text-align:left;} table.left-headed td {background-color: white;text-align:right;width:120px;} td {padding:2px 6px;color:#555;} tr.even {background-color:#cce6ff;} tr.odd {background-color:white;} td.center {text-align:center} td.right {text-align:right} label.check {width:150px;float:left;} pre { border-left: solid 1px #cccccc; border-top: solid 1px #cccccc; border-bottom: solid 1px #cccccc; border-right: dashed 2px #cccccc; margin: 1em; padding: 0.7em; display: block; overflow: hidden; color: white; background-color: black; } pre:hover { overflow: visible; border-right: solid 1px #cccccc; width: 1000px; } div.nav {color:#007777;font-size:11px;} div.nav a, div.nav a:visited { background-color: white; color: #007777; padding: 2px 4px; border: 1px solid #bbd8ef; margin-right: 1px; text-decoration: none; font-size: 11px; } div.nav a.deleted { background-color: #fee; } div.nav a.deleted:hover { background-color: #077; color: #fee; } div.nav a.disabled, div.nav a.disabled:hover { cursor: default; color: #ccc; border: 1px solid #ccc; background-color: white; } div.nav a:hover { background-color: #007777; color: white; } /* Auto Suggest */ div.suggestions { -moz-box-sizing: border-box; box-sizing: border-box; border: 1px solid black; position: absolute; } div.suggestions div { cursor: default; padding: 0px 3px; background-color: white; opacity: 0.80; filter:alpha(opacity=80); } div.suggestions div.current { background-color: #3366cc; color: white; } ferret-0.11.8.6/metadata.yml0000644000004100000410000001655312476264460015614 0ustar www-datawww-data--- !ruby/object:Gem::Specification name: ferret version: !ruby/object:Gem::Version version: 0.11.8.6 platform: ruby authors: - David Balmain autorequire: bindir: bin cert_chain: [] date: 2015-02-19 00:00:00.000000000 Z dependencies: - !ruby/object:Gem::Dependency name: rake requirement: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version version: '0' type: :development prerelease: false version_requirements: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version version: '0' description: Ferret is a super fast, highly configurable search library. email: dbalmain@gmail.com executables: - ferret-browser extensions: - ext/extconf.rb extra_rdoc_files: - README - TODO - TUTORIAL - MIT-LICENSE - ext/r_analysis.c - ext/r_index.c - ext/r_qparser.c - ext/r_search.c - ext/r_store.c - ext/r_utils.c - ext/ferret.c files: - CHANGELOG - MIT-LICENSE - README - RELEASE_CHANGES - RELEASE_NOTES - Rakefile - TODO - TUTORIAL - bin/ferret-browser - ext/BZLIB_blocksort.c - ext/BZLIB_bzlib.c - ext/BZLIB_compress.c - ext/BZLIB_crctable.c - ext/BZLIB_decompress.c - ext/BZLIB_huffman.c - ext/BZLIB_randtable.c - ext/STEMMER_api.c - ext/STEMMER_libstemmer.c - ext/STEMMER_stem_ISO_8859_1_danish.c - ext/STEMMER_stem_ISO_8859_1_dutch.c - ext/STEMMER_stem_ISO_8859_1_english.c - ext/STEMMER_stem_ISO_8859_1_finnish.c - ext/STEMMER_stem_ISO_8859_1_french.c - ext/STEMMER_stem_ISO_8859_1_german.c - ext/STEMMER_stem_ISO_8859_1_hungarian.c - ext/STEMMER_stem_ISO_8859_1_italian.c - ext/STEMMER_stem_ISO_8859_1_norwegian.c - ext/STEMMER_stem_ISO_8859_1_porter.c - ext/STEMMER_stem_ISO_8859_1_portuguese.c - ext/STEMMER_stem_ISO_8859_1_spanish.c - ext/STEMMER_stem_ISO_8859_1_swedish.c - ext/STEMMER_stem_ISO_8859_2_romanian.c - ext/STEMMER_stem_KOI8_R_russian.c - ext/STEMMER_stem_UTF_8_danish.c - ext/STEMMER_stem_UTF_8_dutch.c - ext/STEMMER_stem_UTF_8_english.c - ext/STEMMER_stem_UTF_8_finnish.c - ext/STEMMER_stem_UTF_8_french.c - ext/STEMMER_stem_UTF_8_german.c - ext/STEMMER_stem_UTF_8_hungarian.c - ext/STEMMER_stem_UTF_8_italian.c - ext/STEMMER_stem_UTF_8_norwegian.c - ext/STEMMER_stem_UTF_8_porter.c - ext/STEMMER_stem_UTF_8_portuguese.c - ext/STEMMER_stem_UTF_8_romanian.c - ext/STEMMER_stem_UTF_8_russian.c - ext/STEMMER_stem_UTF_8_spanish.c - ext/STEMMER_stem_UTF_8_swedish.c - ext/STEMMER_stem_UTF_8_turkish.c - ext/STEMMER_utilities.c - ext/analysis.c - ext/analysis.h - ext/api.h - ext/array.c - ext/array.h - ext/bitvector.c - ext/bitvector.h - ext/bzlib.h - ext/bzlib_private.h - ext/compound_io.c - ext/config.h - ext/document.c - ext/document.h - ext/except.c - ext/except.h - ext/extconf.rb - ext/ferret.c - ext/ferret.h - ext/field_index.c - ext/field_index.h - ext/filter.c - ext/fs_store.c - ext/global.c - ext/global.h - ext/hash.c - ext/hash.h - ext/hashset.c - ext/hashset.h - ext/header.h - ext/helper.c - ext/helper.h - ext/index.c - ext/index.h - ext/internal.h - ext/lang.c - ext/lang.h - ext/libstemmer.h - ext/mempool.c - ext/mempool.h - ext/modules.h - ext/multimapper.c - ext/multimapper.h - ext/posh.c - ext/posh.h - ext/priorityqueue.c - ext/priorityqueue.h - ext/q_boolean.c - ext/q_const_score.c - ext/q_filtered_query.c - ext/q_fuzzy.c - ext/q_match_all.c - ext/q_multi_term.c - ext/q_parser.c - ext/q_phrase.c - ext/q_prefix.c - ext/q_range.c - ext/q_span.c - ext/q_term.c - ext/q_wildcard.c - ext/r_analysis.c - ext/r_index.c - ext/r_qparser.c - ext/r_search.c - ext/r_store.c - ext/r_utils.c - ext/ram_store.c - ext/scanner.c - ext/scanner.h - ext/scanner_mb.c - ext/scanner_utf8.c - ext/search.c - ext/search.h - ext/similarity.c - ext/similarity.h - ext/sort.c - ext/stem_ISO_8859_1_danish.h - ext/stem_ISO_8859_1_dutch.h - ext/stem_ISO_8859_1_english.h - ext/stem_ISO_8859_1_finnish.h - ext/stem_ISO_8859_1_french.h - ext/stem_ISO_8859_1_german.h - ext/stem_ISO_8859_1_hungarian.h - ext/stem_ISO_8859_1_italian.h - ext/stem_ISO_8859_1_norwegian.h - ext/stem_ISO_8859_1_porter.h - ext/stem_ISO_8859_1_portuguese.h - ext/stem_ISO_8859_1_spanish.h - ext/stem_ISO_8859_1_swedish.h - ext/stem_ISO_8859_2_romanian.h - ext/stem_KOI8_R_russian.h - ext/stem_UTF_8_danish.h - ext/stem_UTF_8_dutch.h - ext/stem_UTF_8_english.h - ext/stem_UTF_8_finnish.h - ext/stem_UTF_8_french.h - ext/stem_UTF_8_german.h - ext/stem_UTF_8_hungarian.h - ext/stem_UTF_8_italian.h - ext/stem_UTF_8_norwegian.h - ext/stem_UTF_8_porter.h - ext/stem_UTF_8_portuguese.h - ext/stem_UTF_8_romanian.h - ext/stem_UTF_8_russian.h - ext/stem_UTF_8_spanish.h - ext/stem_UTF_8_swedish.h - ext/stem_UTF_8_turkish.h - ext/stopwords.c - ext/store.c - ext/store.h - ext/symbol.c - ext/symbol.h - ext/term_vectors.c - ext/threading.h - ext/win32.h - lib/ferret.rb - lib/ferret/browser.rb - lib/ferret/browser/s/global.js - lib/ferret/browser/s/style.css - lib/ferret/browser/views/document/list.rhtml - lib/ferret/browser/views/document/show.rhtml - lib/ferret/browser/views/error/index.rhtml - lib/ferret/browser/views/help/index.rhtml - lib/ferret/browser/views/home/index.rhtml - lib/ferret/browser/views/layout.rhtml - lib/ferret/browser/views/term-vector/index.rhtml - lib/ferret/browser/views/term/index.rhtml - lib/ferret/browser/views/term/termdocs.rhtml - lib/ferret/browser/webrick.rb - lib/ferret/document.rb - lib/ferret/field_infos.rb - lib/ferret/field_symbol.rb - lib/ferret/index.rb - lib/ferret/number_tools.rb - lib/ferret/version.rb - setup.rb - test/long_running/largefile/tc_largefile.rb - test/test_all.rb - test/test_helper.rb - test/test_installed.rb - test/threading/number_to_spoken.rb - test/threading/thread_safety_index_test.rb - test/threading/thread_safety_read_write_test.rb - test/threading/thread_safety_test.rb - test/unit/analysis/tc_analyzer.rb - test/unit/analysis/tc_token_stream.rb - test/unit/index/tc_index.rb - test/unit/index/tc_index_reader.rb - test/unit/index/tc_index_writer.rb - test/unit/index/th_doc.rb - test/unit/query_parser/tc_query_parser.rb - test/unit/search/tc_filter.rb - test/unit/search/tc_fuzzy_query.rb - test/unit/search/tc_index_searcher.rb - test/unit/search/tc_multi_searcher.rb - test/unit/search/tc_multiple_search_requests.rb - test/unit/search/tc_search_and_sort.rb - test/unit/search/tc_sort.rb - test/unit/search/tc_sort_field.rb - test/unit/search/tc_spans.rb - test/unit/search/tm_searcher.rb - test/unit/store/tc_fs_store.rb - test/unit/store/tc_ram_store.rb - test/unit/store/tm_store.rb - test/unit/store/tm_store_lock.rb - test/unit/tc_document.rb - test/unit/tc_field_symbol.rb - test/unit/ts_analysis.rb - test/unit/ts_index.rb - test/unit/ts_largefile.rb - test/unit/ts_query_parser.rb - test/unit/ts_search.rb - test/unit/ts_store.rb - test/unit/ts_utils.rb - test/unit/utils/tc_bit_vector.rb - test/unit/utils/tc_number_tools.rb - test/unit/utils/tc_priority_queue.rb - test/utils/content_generator.rb homepage: http://github.com/jkraemer/ferret licenses: [] metadata: {} post_install_message: rdoc_options: - "--title" - Ferret -- Ruby Search Library - "--main" - README - "--line-numbers" - TUTORIAL - TODO require_paths: - lib required_ruby_version: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version version: '0' required_rubygems_version: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version version: '0' requirements: [] rubyforge_project: ferret rubygems_version: 2.4.5 signing_key: specification_version: 4 summary: Ruby indexing library. test_files: [] ferret-0.11.8.6/test/0000755000004100000410000000000012476264460014256 5ustar www-datawww-dataferret-0.11.8.6/test/long_running/0000755000004100000410000000000012476264460016755 5ustar www-datawww-dataferret-0.11.8.6/test/long_running/largefile/0000755000004100000410000000000012476264460020707 5ustar www-datawww-dataferret-0.11.8.6/test/long_running/largefile/tc_largefile.rb0000644000004100000410000000233012476264460023652 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" class SampleLargeTest < Test::Unit::TestCase include Ferret::Index include Ferret::Search include Ferret::Store include Ferret::Utils INDEX_DIR = File.dirname(__FILE__) + "/../../temp/largefile" RECORDS = 750 RECORD_SIZE = 10e5 def setup @index = Index.new(:path => INDEX_DIR, :create_if_missing => true, :key => :id) create_index! if @index.size == 0 or ENV["RELOAD_LARGE_INDEX"] end def test_file_index_created assert @index.size == RECORDS, "Index size should be #{RECORDS}, is #{@index.size}" end def test_keys_work @index << {:content => "foo", :id => RECORDS - 4} assert @index.size == RECORDS, "Index size should be #{RECORDS}, is #{@index.size}" end def test_read_file_after_two_gigs assert @index.reader[RECORDS - 5].load.is_a?(Hash) end def create_index! @@already_built_large_index ||= false return if @@already_built_large_index @@already_built_large_index = true a = "a" RECORDS.times { |i| seq = (a.succ! + " ") * RECORD_SIZE record = {:id => i, :content => seq} @index << record print "i" STDOUT.flush } puts "o" @index.optimize end end ferret-0.11.8.6/test/threading/0000755000004100000410000000000012476264460016223 5ustar www-datawww-dataferret-0.11.8.6/test/threading/number_to_spoken.rb0000644000004100000410000000716212476264460022127 0ustar www-datawww-data# Author: Matthew D Moss # # Writtern for ruby quiz #25 # class JapaneseTranslator # My knowledge of counting Japanese is limited, so this may not # be entirely correct; in particular, I don't know what rules # to follow after 'hyaku man' (1,000,000). # I also combine a digit with its group, such as 'gohyaku' rather # than 'go hyaku'; I just like reading it better that way. DIGITS = %w(zero ichi ni san yon go roku nana hachi kyu) GROUPS = %w(nothingtoseeheremovealong ju hyaku sen) MAN = 10000 def to_spoken(val) case val <=> 0 when -1 '- ' + to_spoken(-val) when 0 DIGITS[0] else group(val, 0) end end private def group(val, level) if val >= MAN group(val / MAN, 0) + 'man ' + group(val % MAN, 0) else case val when 0 '' when 1 level == 0 ? DIGITS[val] : GROUPS[level] when 2...10 DIGITS[val] + (GROUPS[level] if level > 0).to_s else group(val / 10, level+1) + ' ' + group(val % 10, level) end end end end class USEnglishTranslator # Formal, US English. Optional 'and'. Will not produce things # such as 'twelve hundred' but rather 'one thousand two hundred'. # The use of 'and' is incomplete; it is sometimes missed. DIGITS = %w(zero one two three four five six seven eight nine) TEENS = %w(ten eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen) TENS = %w(hello world twenty thirty forty fifty sixty seventy eighty ninety) GROUPS = %w(thousand million billion trillion quadrillion quintillion sextillion septillion octillion nonillion decillion) K = 1000 def initialize(conjunction = true) @conjunction = conjunction end def to_spoken(val) case val <=> 0 when -1 'negative ' + to_spoken(-val) when 0 DIGITS[0] else group(val, 0).flatten.join(' ') end end private def group(val, level) x = group(val / K, level + 1) << GROUPS[level] if val >= K x.to_a << under_1000(val % K, level) end def under_1000(val, level) x = [DIGITS[val / 100]] << 'hundred' if val >= 100 x.to_a << under_100(val % 100, (level == 0 and not x.nil?)) end def under_100(val, junction) x = [('and' if @conjunction and junction)] # wyf? case val when 0 [] when 1...10 x << DIGITS[val] when 10...20 x << TEENS[val - 10] else d = val % 10 x << (TENS[val / 10] + ('-' + DIGITS[d] if d != 0).to_s) end end end class Integer def to_spoken(translator = USEnglishTranslator.new) translator.to_spoken(self).squeeze(' ').strip end end if $0 == __FILE__ SAMPLES = [ 0, 1, 2, 5, 10, 11, 14, 18, 20, 21, 29, 33, 42, 50, 87, 99, 100, 101, 110, 167, 199, 200, 201, 276, 300, 314, 500, 610, 1000, 1039, 1347, 2309, 3098, 23501, 32767, 70000, 5480283, 2435489238, 234100090000, -42, -2001 ] TRANSLATORS = { 'US English' => USEnglishTranslator.new, 'Japanese' => JapaneseTranslator.new } # main TRANSLATORS.each do |lang, translator| puts puts lang puts '-' * lang.length SAMPLES.each do |val| puts "%12d => %s" % [val, val.to_spoken(translator)] end end end ferret-0.11.8.6/test/threading/thread_safety_test.rb0000755000004100000410000000652412476264460022443 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../test_helper" require File.join(File.dirname(__FILE__), "number_to_spoken.rb") require 'thread' class ThreadSafetyTest include Ferret::Index include Ferret::Search include Ferret::Store include Ferret def initialize(options) @options = options end INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index")) ANALYZER = Ferret::Analysis::WhiteSpaceAnalyzer.new() ITERATIONS = 1000 QUERY_PARSER = Ferret::QueryParser.new(:analyzer => ANALYZER, :default_field => 'contents') @@searcher = nil def run_index_thread(writer) reopen_interval = 30 + rand(60) use_compound_file = false (400*ITERATIONS).times do |i| n = rand(0xFFFFFFFF) d = {:id => n.to_s, :contents => n.to_spoken} puts("Adding #{n}") # Switch between single and multiple file segments use_compound_file = (rand < 0.5) writer.use_compound_file = use_compound_file writer << d if (i % reopen_interval == 0) writer.close() writer = IndexWriter.new(:path => INDEX_DIR, :analyzer => ANALYZER) end end writer.close() rescue => e puts e puts e.backtrace raise e end def run_search_thread(use_global) reopen_interval = 10 + rand(20) unless use_global searcher = Searcher.new(INDEX_DIR) end (50*ITERATIONS).times do |i| search_for(rand(0xFFFFFFFF), (searcher.nil? ? @@searcher : searcher)) if (i%reopen_interval == 0) if (searcher == nil) @@searcher = Searcher.new(INDEX_DIR) else searcher.close() searcher = Searcher.new(INDEX_DIR) end end end rescue => e puts e puts e.backtrace raise e end def search_for(n, searcher) puts("Searching for #{n}") topdocs = searcher.search(QUERY_PARSER.parse(n.to_spoken), :limit => 3) puts("Search for #{n}: total = #{topdocs.total_hits}") topdocs.hits.each do |hit| puts "Hit for #{n}: #{searcher.reader[hit.doc]["id"]} - #{hit.score}" end end def run_test_threads threads = [] unless @options[:read_only] writer = IndexWriter.new(:path => INDEX_DIR, :analyzer => ANALYZER, :create => !@options[:add]) threads << Thread.new { run_index_thread(writer) } sleep(1) end threads << Thread.new { run_search_thread(false)} @@searcher = Searcher.new(INDEX_DIR) threads << Thread.new { run_search_thread(true)} threads << Thread.new { run_search_thread(true)} threads.each {|t| t.join} end end if $0 == __FILE__ require 'optparse' OPTIONS = { :all => false, :read_only => false, } ARGV.options do |opts| script_name = File.basename($0) opts.banner = "Usage: ruby #{script_name} [options]" opts.separator "" opts.on("-r", "--read-only", "Read Only.") { OPTIONS[:all] = true } opts.on("-a", "--all", "All.") { OPTIONS[:read_only] = true } opts.separator "" opts.on("-h", "--help", "Show this help message.") { puts opts; exit } opts.parse! end tst = ThreadSafetyTest.new(OPTIONS) tst.run_test_threads end ferret-0.11.8.6/test/threading/thread_safety_read_write_test.rb0000644000004100000410000000304512476264460024640 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../test_helper" require File.dirname(__FILE__) + "/number_to_spoken.rb" require 'thread' class IndexThreadSafetyReadWriteTest < Test::Unit::TestCase include Ferret::Index INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index")) ITERATIONS = 10000 ANALYZER = Ferret::Analysis::Analyzer.new() def setup @index = Index.new(:path => INDEX_DIR, :create => true, :analyzer => ANALYZER, :default_field => :content) end def search_thread() ITERATIONS.times do do_search() sleep(rand(1)) end rescue => e puts e puts e.backtrace @index = nil raise e end def index_thread() ITERATIONS.times do do_add_doc() sleep(rand(1)) end rescue => e puts e puts e.backtrace @index = nil raise e end def do_add_doc n = rand(0xFFFFFFFF) d = {:id => n.to_s, :content => n.to_spoken} puts("Adding #{n}") begin @index << d rescue => e puts e puts e.backtrace @index = nil raise e end end def do_search n = rand(0xFFFFFFFF) puts("Searching for #{n}") hits = @index.search_each(n.to_spoken, :num_docs => 3) do |d, s| puts "Hit for #{n}: #{@index[d]["id"]} - #{s}" end puts("Searched for #{n}: total = #{hits}") end def test_threading threads = [] threads << Thread.new { search_thread } threads << Thread.new { index_thread } threads.each { |t| t.join } end end ferret-0.11.8.6/test/threading/thread_safety_index_test.rb0000644000004100000410000000410612476264460023621 0ustar www-datawww-data$:.unshift('.') require 'monitor' require File.dirname(__FILE__) + "/../test_helper" require File.dirname(__FILE__) + "/number_to_spoken.rb" require 'thread' class IndexThreadSafetyTest < Test::Unit::TestCase include Ferret::Index INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index")) ITERATIONS = 100 NUM_THREADS = 3 ANALYZER = Ferret::Analysis::StandardAnalyzer.new() def setup index = Index.new(:path => INDEX_DIR, :create => true, :analyzer => ANALYZER, :default_field => :content) index.close end def indexing_thread() index = Index.new(:path => INDEX_DIR, :analyzer => ANALYZER, :auto_flush => true, :default_field => :content) ITERATIONS.times do choice = rand() if choice > 0.98 do_optimize(index) elsif choice > 0.7 do_delete_doc(index) elsif choice > 0.5 do_search(index) else do_add_doc(index) end index.commit end rescue Exception => e puts e puts e.backtrace raise 'hell' end def do_optimize(index) puts "Optimizing the index" index.optimize end def do_delete_doc(index) return if index.size == 0 doc_num = rand(index.size) puts "Deleting #{doc_num} from index which has#{index.has_deletions? ? "" : " no"} deletions" puts "document was already deleted" if (index.deleted?(doc_num)) index.delete(doc_num) end def do_add_doc(index) n = rand(0xFFFFFFFF) d = {:id => n, :content => n.to_spoken} puts("Adding #{n}") index << d end def do_search(index) n = rand(0xFFFFFFFF) puts("Searching for #{n}") hits = index.search_each(n.to_spoken, :num_docs => 3) do |d, s| puts "Hit for #{n}: #{index[d][:id]} - #{s}" end puts("Searched for #{n}: total = #{hits}") end def test_threading threads = [] NUM_THREADS.times do threads << Thread.new { indexing_thread } end threads.each {|t| t.join } end end ferret-0.11.8.6/test/test_helper.rb0000755000004100000410000000115012476264460017121 0ustar www-datawww-data$:.unshift File.dirname(__FILE__) if $test_installed_gem require 'rubygems' require 'ferret' else $:.unshift File.join(File.dirname(__FILE__), '../lib') $:.unshift File.join(File.dirname(__FILE__), '../ext') end ENV['LANG'] = "en_US.UTF-8" ENV['LC_CTYPE'] = "en_US.UTF-8" class Float def approx_eql?(o) return (1 - self/o).abs < 0.0001 end alias :=~ :approx_eql? end require 'test/unit' require 'ferret' require 'unit/index/th_doc' if (defined?(IndexTestHelper).nil?) def load_test_dir(dir) Dir[File.join(File.dirname(__FILE__), dir, "t[scm]*.rb")].each do |file| require file end end ferret-0.11.8.6/test/test_all.rb0000644000004100000410000000015712476264460016415 0ustar www-datawww-data$:.unshift File.dirname(__FILE__) require 'test_helper.rb' load_test_dir("unit") #load_test_dir("functional") ferret-0.11.8.6/test/test_installed.rb0000644000004100000410000000003312476264460017615 0ustar www-datawww-data$test_installed_gem = true ferret-0.11.8.6/test/unit/0000755000004100000410000000000012476264460015235 5ustar www-datawww-dataferret-0.11.8.6/test/unit/tc_field_symbol.rb0000644000004100000410000000131612476264460020721 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../test_helper" class FieldSymbolTest < Test::Unit::TestCase def test_field_symbol Ferret::FIELD_TYPES.each do |field_type| assert(:sym.respond_to?(field_type), "Symbol doesn't respond to #{field_type}") end %w(desc desc? type).each do |method| assert(:sym.respond_to?(method), "Symbol doesn't respond to #{method}") end assert_nil(:sym.type) assert(!:sym.desc?) assert(:sym.desc.desc?) assert(!:sym.desc.desc.desc?) Ferret::FIELD_TYPES.each do |field_type| assert_equal(field_type, :sym.__send__(field_type).type) end assert(:string, :sym.integer.byte.float.string.type.to_s) end end ferret-0.11.8.6/test/unit/index/0000755000004100000410000000000012476264460016344 5ustar www-datawww-dataferret-0.11.8.6/test/unit/index/th_doc.rb0000755000004100000410000002660612476264460020146 0ustar www-datawww-datamodule IndexTestHelper include Ferret::Index include Ferret::Analysis include Ferret::Search def IndexTestHelper.make_binary(size) tmp = Array.new(size) size.times {|i| tmp[i] = i%256 } return tmp.pack("c*") end BINARY_DATA = IndexTestHelper.make_binary(256) COMPRESSED_BINARY_DATA = IndexTestHelper.make_binary(56) def IndexTestHelper.prepare_document(dir) fis = FieldInfos.new fis.add_field(:text_field1, :term_vector => :no) fis.add_field(:text_field2) fis.add_field(:key_field, :index => :untokenized) fis.add_field(:unindexed_field, :index => :no) fis.add_field(:unstored_field1, :store => :no, :term_vector => :no) fis.add_field(:unstored_field2, :store => :no, :term_vector => :yes) fis.add_field(:compressed_field, :store => :compressed, :term_vector => :yes) fis.add_field(:binary_field, :index => :no, :term_vector => :no) fis.add_field(:compressed_binary_field, :store => :compressed, :index => :no, :term_vector => :no) doc = { :text_field1 => "field one text", :text_field2 => "field field field two text", :key_field => "keyword", :unindexed_field => "unindexed field text", :unstored_field1 => "unstored field text one", :unstored_field2 => "unstored field text two", :compressed_field => "compressed text", :binary_field => BINARY_DATA, :compressed_binary_field => COMPRESSED_BINARY_DATA } return doc, fis end def IndexTestHelper.prepare_documents [ ["apple", "green"], ["apple", "red"], ["orange", "orange"], ["grape", "green"], ["grape", "purple"], ["mandarin", "orange"], ["peach", "orange"], ["apricot", "orange"] ].map { |food| {"name" => food[0], "colour" => food[1]} } end def IndexTestHelper.prepare_book_list books = [ {"author" => "P.H. Newby", "title" => "Something To Answer For", "year" => "1969"}, {"author" => "Bernice Rubens", "title" => "The Elected Member", "year" => "1970"}, {"author" => "V. S. Naipaul", "title" => "In a Free State", "year" => "1971"}, {"author" => "John Berger", "title" => "G", "year" => "1972"}, {"author" => "J. G. Farrell", "title" => "The Siege of Krishnapur", "year" => "1973"}, {"author" => "Stanley Middleton", "title" => "Holiday", "year" => "1974"}, {"author" => "Nadine Gordimer", "title" => "The Conservationist", "year" => "1974"}, {"author" => "Ruth Prawer Jhabvala", "title" => "Heat and Dust", "year" => "1975"}, {"author" => "David Storey", "title" => "Saville", "year" => "1976"}, {"author" => "Paul Scott", "title" => "Staying On", "year" => "1977"}, {"author" => "Iris Murdoch", "title" => "The Sea", "year" => "1978"}, {"author" => "Penelope Fitzgerald", "title" => "Offshore", "year" => "1979"}, {"author" => "William Golding", "title" => "Rites of Passage", "year" => "1980"}, {"author" => "Salman Rushdie", "title" => "Midnight's Children", "year" => "1981"}, {"author" => "Thomas Keneally", "title" => "Schindler's Ark", "year" => "1982"}, {"author" => "J. M. Coetzee", "title" => "Life and Times of Michael K", "year" => "1983"}, {"author" => "Anita Brookner", "title" => "Hotel du Lac", "year" => "1984"}, {"author" => "Keri Hulme", "title" => "The Bone People", "year" => "1985"}, {"author" => "Kingsley Amis", "title" => "The Old Devils", "year" => "1986"}, {"author" => "Penelope Lively", "title" => "Moon Tiger", "year" => "1987"}, {"author" => "Peter Carey", "title" => "Oscar and Lucinda", "year" => "1988"}, {"author" => "Kazuo Ishiguro", "title" => "The Remains of the Day", "year" => "1989"}, {"author" => "A. S. Byatt", "title" => "Possession", "year" => "1990"}, {"author" => "Ben Okri", "title" => "The Famished Road", "year" => "1991"}, {"author" => "Michael Ondaatje", "title" => "The English Patient", "year" => "1992"}, {"author" => "Barry Unsworth", "title" => "Sacred Hunger", "year" => "1992"}, {"author" => "Roddy Doyle", "title" => "Paddy Clarke Ha Ha Ha", "year" => "1993"}, {"author" => "James Kelman", "title" => "How Late It Was, How Late", "year" => "1994"}, {"author" => "Pat Barker", "title" => "The Ghost Road", "year" => "1995"}, {"author" => "Graham Swift", "title" => "Last Orders", "year" => "1996"}, {"author" => "Arundati Roy", "title" => "The God of Small Things", "year" => "1997"}, {"author" => "Ian McEwan", "title" => "Amsterdam", "year" => "1998"}, {"author" => "J. M. Coetzee", "title" => "Disgrace", "year" => "1999"}, {"author" => "Margaret Atwood", "title" => "The Blind Assassin", "year" => "2000"}, {"author" => "Peter Carey", "title" => "True History of the Kelly Gang", "year" => "2001"}, {"author" => "Yann Martel", "title" => "The Life of Pi", "year" => "2002"}, {"author" => "DBC Pierre", "title" => "Vernon God Little", "year" => "2003"} ] end def self.prepare_ir_test_fis fis = FieldInfos.new fis.add_field(:body) fis.add_field(:changing_field, :term_vector => :no) fis.add_field(:title, :index => :untokenized, :term_vector => :with_offsets) fis.add_field(:author, :term_vector => :with_positions) fis.add_field(:year, :index => :no, :term_vector => :no) fis.add_field(:text, :store => :no, :term_vector => :no) end INDEX_TEST_DOC_COUNT = 64 def self.prepare_ir_test_docs docs = [] docs[0] = { :body => "Where is Wally", :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " + "word3 word3", } docs[1] = { :body => "Some Random Sentence read" } docs[2] = { :body => "Some read Random Sentence read" } docs[3] = { :title => "War And Peace", :body => "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", :author => "Leo Tolstoy", :year => "1865", :text => "more text which is not stored" } docs[4] = { :body => "Some Random Sentence" } docs[5] = { :body => "Here's Wally" } docs[6] = { :body => "Some Random Sentence read read read read" } docs[7] = { :body => "Some Random Sentence" } docs[8] = { :body => "Some Random Sentence" } docs[9] = { :body => "read Some Random Sentence read this will be used after " + "unfinished next position read" } docs[10] = { :body => "Some read Random Sentence", :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " + "word3 word3" } docs[11] = { :body => "And here too. Well, maybe Not" } docs[12] = { :body => "Some Random Sentence" } docs[13] = { :body => "Some Random Sentence" } docs[14] = { :body => "Some Random Sentence" } docs[15] = { :body => "Some Random Sentence" } docs[16] = { :body => "Some Random read read Sentence" } docs[17] = { :body => "Some Random read Sentence", :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " + "word3 word3" } docs[18] = { :body => "Wally Wally Wally" } docs[19] = { :body => "Some Random Sentence", :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " + "word3 word3" } docs[20] = { :body => "Wally is where Wally usually likes to go. Wally Mart! Wally " + "likes shopping there for Where's Wally books. Wally likes " + "to read", :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " + "word3 word3" } docs[21] = { :body => "Some Random Sentence read read read and more read read read", :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " + "word3 word3" } buf = "" 21.times { buf << "skip " } 22.upto(INDEX_TEST_DOC_COUNT-1) do |i| buf << "skip " docs[i] = {:text => buf.clone} end return docs end INDEX_TEST_DOCS = self.prepare_ir_test_docs() INDEX_TEST_FIS = self.prepare_ir_test_fis() def self.prepare_search_docs i = 1 [ ["20050930", "cat1/", 0.123, "word1" ], ["20051001", "cat1/sub1", 0.954, "word1 word2 the quick brown fox" ], ["20051002", "cat1/sub1/subsub1", 908.125, "word1 word3" ], ["20051003", "cat1/sub2", 3999, "word1 word3" ], ["20051004", "cat1/sub2/subsub2", "+.3412", "word1 word2" ], ["20051005", "cat2/sub1", -1.298, "word1" ], ["20051006", "cat2/sub1", "2", "word1 word3" ], ["20051007", "cat2/sub1", "+8.894", "word1" ], ["20051008", "cat2/sub1", "+21235.2135", "word1 word2 word3 the fast brown fox" ], ["20051009", "cat3/sub1", "10.0", "word1" ], ["20051010", "cat3/sub1", 1, "word1" ], ["20051011", "cat3/sub1", -12518419, "word1 word3 the quick red fox" ], ["20051012", "cat3/sub1", "10", "word1" ], ["20051013", "cat1/sub2", "15682954", "word1" ], ["20051014", "cat1/sub1", "91239", "word1 word3 the quick hairy fox" ], ["20051015", "cat1/sub2/subsub1", "-.89321", "word1" ], ["20051016", "cat1/sub1/subsub2", -89, "word1 the quick fox is brown and hairy and a little red" ], ["20051017", "cat1/", "-1.0", "word1 the brown fox is quick and red" ] ].map do |date, category, number, field| doc = Ferret::Document.new(i) i += 1 doc[:date] = date doc[:category] = category doc[:field] = field doc[:number] = number doc end end SEARCH_TEST_DOCS = self.prepare_search_docs() end ferret-0.11.8.6/test/unit/index/tc_index.rb0000644000004100000410000007177512476264460020507 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" class IndexTest < Test::Unit::TestCase include Ferret::Index include Ferret::Search include Ferret::Analysis include Ferret::Store def setup() end def teardown() end def check_results(index, query, expected) cnt = 0 #puts "#{query} - #{expected.inspect}" #puts index.size index.search_each(query) do |doc, score| #puts "doc-#{doc} score=#{score}" assert_not_nil(expected.index(doc), "doc #{doc} found but not expected") cnt += 1 end assert_equal(expected.length, cnt) end def do_test_index_with_array(index) [ ["one two"], ["one", "three"], ["two"], ["one", "four"], ["one two"], ["two", "three", "four"], ["one"], ["two", "three", "four", "five"] ].each {|doc| index << doc } assert_equal(8, index.size) q = "one" check_results(index, q, [0, 1, 3, 4, 6]) q = "one AND two" check_results(index, q, [0, 4]) q = "one OR five" check_results(index, q, [0, 1, 3, 4, 6, 7]) assert_equal(%w{two three four five}, index.doc(7)[:xxx]) end def do_test_index_with_hash(index) data = [ {:xxx => "one two"}, {:xxx => "one", :field2 => "three"}, {:xxx => "two"}, {:xxx => "one", :field2 => "four"}, {:xxx => "one two"}, {:xxx => "two", :field2 => "three", :field3 => "four"}, {:xxx => "one"}, {:xxx => "two", :field2 => "three", :field3 => "five"} ] data.each {|doc| index << doc } q = "one AND two" check_results(index, q, [0, 4]) q = "one OR five" check_results(index, q, [0, 1, 3, 4, 6]) q = "one OR field3:five" check_results(index, q, [0, 1, 3, 4, 6, 7]) assert_equal("four", index[5]["field3"]) q = "field3:f*" check_results(index, q, [5, 7]) q = "*:(one AND NOT three)" check_results(index, q, [0, 3, 4, 6]) q = "*:(one AND (NOT three))" check_results(index, q, [0, 3, 4, 6]) q = "two AND field3:f*" check_results(index, q, [5, 7]) assert_equal("five", index.doc(7)["field3"]) assert_equal("two", index.doc(7)[:xxx]) end def do_test_index_with_doc_array(index) data = [ {:xxx => "one two multi", :id => "myid"}, {:xxx => "one", :field2 => "three multi"}, {:xxx => "two"}, {:xxx => "one", :field2 => "four"}, {:xxx => "one two"}, {:xxx => "two", :field2 => "three", :field3 => "four"}, {:xxx => "one multi2", :id => "hello"}, {:xxx => "two", :field2 => "this three multi2", :field3 => "five multi"} ] data.each {|doc| index << doc } q = "one AND two" check_results(index, q, [0, 4]) q = "one OR five" check_results(index, q, [0, 1, 3, 4, 6]) q = "one OR field3:five" check_results(index, q, [0, 1, 3, 4, 6, 7]) q = "two AND (field3:f*)" check_results(index, q, [5, 7]) q = "*:(multi OR multi2)" check_results(index, q, [0, 1, 6, 7]) q = "field2|field3:(multi OR multi2)" check_results(index, q, [1, 7]) doc = index[5] assert_equal("three", index[5]["field2"]) assert(!index.has_deletions?) assert(!index.deleted?(5)) assert_equal(8, index.size) index.delete(5) assert(index.has_deletions?) assert(index.deleted?(5)) assert_equal(7, index.size) q = "two AND (field3:f*)" check_results(index, q, [7]) doc.load doc[:field2] = "dave" index << doc check_results(index, q, [7, 8]) check_results(index, "*:this", []) assert_equal(8, index.size) assert_equal("dave", index[8][:field2]) index.optimize check_results(index, q, [6, 7]) assert_equal("dave", index[7][:field2]) index.query_delete("field2:three") assert(index.deleted?(1)) assert(index.deleted?(6)) assert(! index.deleted?(7)) assert_equal("one multi2", index["hello"][:xxx]) assert_equal("one two multi", index["myid"][:xxx]) index.delete("myid") assert(index.deleted?(0)) end def test_ram_index index = Ferret::I.new(:default_input_field => :xxx) do_test_index_with_array(index) index.close index = Index.new(:default_field => :xxx) do_test_index_with_hash(index) index.close index = Index.new(:default_field => :xxx, :id_field => :id) do_test_index_with_doc_array(index) index.close end def test_fs_index fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir')) Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end} assert_raise(Ferret::FileNotFoundError) do Index.new(:path => fs_path, :create_if_missing => false, :default_field => :xxx) end index = Index.new(:path => fs_path, :default_input_field => :xxx) do_test_index_with_array(index) index.close Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end} index = Index.new(:path => fs_path, :default_field => :xxx) do_test_index_with_hash(index) index.close Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end} index = Index.new(:path => fs_path, :default_field => :xxx, :id_field => "id") do_test_index_with_doc_array(index) index.close end def test_fs_index_is_persistant fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir')) index = Index.new(:path => fs_path, :default_field => :xxx, :create => true) [ {:xxx => "one two", :id => "me"}, {:xxx => "one", :field2 => "three"}, {:xxx => "two"}, {:xxx => "one", :field2 => "four"}, {:xxx => "one two"}, {:xxx => "two", :field2 => "three", :field3 => "four"}, {:xxx => "one"}, {:xxx => "two", :field2 => "three", :field3 => "five"} ].each {|doc| index << doc } assert_equal(8, index.size) index.close index = Index.new(:path => fs_path, :create_if_missing => false) assert_equal(8, index.size) assert_equal("four", index[5]["field3"]) index.close end def test_key_used_for_id_field fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir')) index = Index.new(:path => fs_path, :key => :my_id, :create => true) [ {:my_id => "three", :id => "me"}, {:my_id => "one", :field2 => "three"}, {:my_id => "two"}, {:my_id => "one", :field2 => "four"}, {:my_id => "three"}, {:my_id => "two", :field2 => "three", :field3 => "four"}, {:my_id => "one"}, {:my_id => "two", :field2 => "three", :field3 => "five"} ].each {|doc| index << doc } index.optimize assert_equal(3, index.size) assert_equal("three", index["two"][:field2]) index.close end def test_merging_indexes index1 = Index.new(:default_field => :f) index2 = Index.new(:default_field => :f) index3 = Index.new(:default_field => :f) [ {:f => "zero"}, {:f => "one"}, {:f => "two"} ].each {|doc| index1 << doc } [ {:f => "three"}, {:f => "four"}, {:f => "five"} ].each {|doc| index2 << doc } [ {:f => "six"}, {:f => "seven"}, {:f => "eight"} ].each {|doc| index3 << doc } index = Index.new(:default_field => :f) index.add_indexes(index1) assert_equal(3, index.size) assert_equal("zero", index[0][:f]) index.add_indexes([index2, index3]) assert_equal(9, index.size) assert_equal("zero", index[0][:f]) assert_equal("eight", index[8][:f]) index1.close index2.close index3.close assert_equal("seven", index[7][:f]) data = [ {:f => "alpha"}, {:f => "beta"}, {:f => "charlie"} ] dir1 = RAMDirectory.new index1 = Index.new(:dir => dir1, :default_field => :f) data.each {|doc| index1 << doc } index1.flush data = [ {:f => "delta"}, {:f => "echo"}, {:f => "foxtrot"} ] dir2 = RAMDirectory.new index2 = Index.new(:dir => dir2, :default_field => :f) data.each {|doc| index2 << doc } index2.flush data = [ {:f => "golf"}, {:f => "india"}, {:f => "juliet"} ] dir3 = RAMDirectory.new index3 = Index.new(:dir => dir3, :default_field => :f) data.each {|doc| index3 << doc } index3.flush index.add_indexes(dir1) assert_equal(12, index.size) assert_equal("alpha", index[9][:f]) index.add_indexes([dir2, dir3]) assert_equal(18, index.size) assert_equal("juliet", index[17][:f]) index1.close dir1.close index2.close dir2.close index3.close dir3.close assert_equal("golf", index[15][:f]) index.close end def test_persist_index data = [ {:f => "zero"}, {:f => "one"}, {:f => "two"} ] index = Index.new(:default_field => :f) data.each {|doc| index << doc } fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir')) index.persist(fs_path, true) assert_equal(3, index.size) assert_equal("zero", index[0][:f]) index.close index = Index.new(:path => fs_path) assert_equal(3, index.size) assert_equal("zero", index[0][:f]) index.close data = [ {:f => "romeo"}, {:f => "sierra"}, {:f => "tango"} ] index = Index.new(:default_field => :f) data.each {|doc| index << doc } assert_equal(3, index.size) assert_equal("romeo", index[0][:f]) dir = FSDirectory.new(fs_path, false) index.persist(dir) assert_equal(6, index.size) assert_equal("zero", index[0][:f]) assert_equal("romeo", index[3][:f]) index.close index = Index.new(:path => fs_path) assert_equal(6, index.size) assert_equal("zero", index[0][:f]) assert_equal("romeo", index[3][:f]) index.close end def test_auto_update_when_externally_modified() fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir')) index = Index.new(:path => fs_path, :default_field => :f, :create => true) index << "document 1" assert_equal(1, index.size) index2 = Index.new(:path => fs_path, :default_field => :f) assert_equal(1, index2.size) index2 << "document 2" assert_equal(2, index2.size) assert_equal(2, index.size) top_docs = index.search("content3") assert_equal(0, top_docs.hits.size) iw = IndexWriter.new(:path => fs_path, :analyzer => WhiteSpaceAnalyzer.new) iw << {:f => "content3"} iw.close() top_docs = index.search("content3") assert_equal(1, top_docs.hits.size) assert_equal(3, index.size) assert_equal("content3", index[2][:f]) index2.close index.close end def test_delete index = Index.new(:analyzer => WhiteSpaceAnalyzer.new) data = [ {:id => 0, :cat => "/cat1/subcat1"}, {:id => 1, :cat => "/cat1/subcat2"}, {:id => 2, :cat => "/cat1/subcat2"}, {:id => 3, :cat => "/cat1/subcat3"}, {:id => 4, :cat => "/cat1/subcat4"}, {:id => 5, :cat => "/cat2/subcat1"}, {:id => 6, :cat => "/cat2/subcat2"}, {:id => 7, :cat => "/cat2/subcat3"}, {:id => 8, :cat => "/cat2/subcat4"}, {:id => 9, :cat => "/cat2/subcat5"}, ].each {|doc| index << doc } assert_equal(10, index.size) assert_equal(1, index.search("id:9").total_hits) index.delete(9) assert_equal(9, index.size) assert_equal(0, index.search("id:9").total_hits) assert_equal(1, index.search("id:8").total_hits) index.delete("8") assert_equal(8, index.size) assert_equal(0, index.search("id:8").total_hits) assert_equal(5, index.search("cat:/cat1*").total_hits) index.query_delete("cat:/cat1*") assert_equal(3, index.size) assert_equal(0, index.search("cat:/cat1*").total_hits) index.close end def test_update index = Index.new(:analyzer => WhiteSpaceAnalyzer.new, :default_input_field => :content, :id_field => :id) data = [ {:id => 0, :cat => "/cat1/subcat1", :content => "content0"}, {:id => 1, :cat => "/cat1/subcat2", :content => "content1"}, {:id => 2, :cat => "/cat1/subcat2", :content => "content2"}, {:id => 3, :cat => "/cat1/subcat3", :content => "content3"}, {:id => 4, :cat => "/cat1/subcat4", :content => "content4"}, {:id => 5, :cat => "/cat2/subcat1", :content => "content5"}, {:id => 6, :cat => "/cat2/subcat2", :content => "content6"}, {:id => 7, :cat => "/cat2/subcat3", :content => "content7"}, {:id => 8, :cat => "/cat2/subcat4", :content => "content8"}, {:id => 9, :cat => "/cat2/subcat5", :content => "content9"}, ].each { |doc| index << doc } assert_equal(10, index.size) assert_equal("content5", index["5"][:content]) index.query_update("id:5", {:content => "content five"}) assert_equal("content five", index["5"][:content]) assert_equal(nil, index["5"][:extra_content]) index.update("5", {:id => "5", :cat => "/cat1/subcat6", :content => "high five", :extra_content => "hello"}) assert_equal("hello", index["5"][:extra_content]) assert_equal("high five", index["5"][:content]) assert_equal("/cat1/subcat6", index["5"][:cat]) assert_equal("content9", index["9"][:content]) index.query_update("content:content9", {:content => "content nine"}) assert_equal("content nine", index["9"][:content]) assert_equal("content0", index["0"][:content]) assert_equal(nil, index["0"][:extra_content]) document = index[0].load document[:content] = "content zero" document[:extra_content] = "extra content" index.update(0, document) assert_equal("content zero", index["0"][:content]) assert_equal("extra content", index["0"][:extra_content]) assert_equal(nil, index["1"][:tag]) assert_equal(nil, index["2"][:tag]) assert_equal(nil, index["3"][:tag]) assert_equal(nil, index["4"][:tag]) index.query_update("id:<5 AND cat:>=/cat1/subcat2", {:tag => "cool"}) assert_equal("cool", index["1"][:tag]) assert_equal("cool", index["2"][:tag]) assert_equal("cool", index["3"][:tag]) assert_equal("cool", index["4"][:tag]) assert_equal(4, index.search("tag:cool").total_hits) index.close end def test_index_key data = [ {:id => 0, :val => "one"}, {:id => 0, :val => "two"}, {:id => 1, :val => "three"}, {:id => 1, :val => "four"}, ] index = Index.new(:analyzer => WhiteSpaceAnalyzer.new, :key => :id) data.each { |doc| index << doc } assert_equal(2, index.size) assert_equal("two", index["0"][:val]) assert_equal("four", index["1"][:val]) index.close end def test_index_key_batch0 data = { "0" => {:id => "0", :val => "one"}, "0" => {:id => "0", :val => "two"}, "1" =>{:id => "1", :val => "three"}, "1" => {:id => "1", :val => "four"}, } index = Index.new(:analyzer => WhiteSpaceAnalyzer.new, :key => :id) index.batch_update data assert_equal(2, index.size) index.close end def test_index_key_batch1 data0 = { "0" => {:id => "0", :val => "one"}, "0" => {:id => "0", :val => "two"}, "1" =>{:id => "1", :val => "three"}, "2" => {:id => "1", :val => "four"}, } data1 = { "0" => {:id => "0", :val => "one"}, "3" => {:id => "3", :val => "two"}, "2" =>{:id => "2", :val => "three"}, "1" => {:id => "1", :val => "four"}, "4" => {:id => "4", :val => "four"}, } index = Index.new(:analyzer => WhiteSpaceAnalyzer.new, :key => :id) index.batch_update data0 assert_equal(3, index.size) index.batch_update data1 assert_equal(5, index.size) index.close end def test_index_key_delete_batch0 data0 = { "0" => {:id => "0", :val => "one"}, "0" => {:id => "0", :val => "two"}, "1" =>{:id => "1", :val => "three"}, "2" => {:id => "2", :val => "four"}, "0" => {:id => "0", :val => "four"}, } data1 = ["0", "1"]; index = Index.new(:analyzer => WhiteSpaceAnalyzer.new, :key => :id) index.batch_update data0 assert_equal("four", index["0"][:val]) assert_equal("three", index["1"][:val]) assert_equal("four", index["2"][:val]) assert_equal(3, index.size) index.delete data1 assert_equal(1, index.size) assert_equal("four", index["2"][:val]) index.close end def test_index_key_delete_batch0 index = Index.new(:analyzer => WhiteSpaceAnalyzer.new) 1000.times {|i| index << {:id => "#{i}", :content => "content #{i}"}} assert_equal(1000, index.size) assert_equal("content 876", index['876'][:content]) new_docs = Array.new(1000) {|i| {:id => i, :content => "#{i} > content"}} index.batch_update(new_docs) assert_equal(1000, index.size) assert_equal("128 > content", index['128'][:content]) new_docs = Array.new(1000) {|i| {:id => i.to_s, :content => "_(#{i})_"}} index.batch_update(new_docs) assert_equal(1000, index.size) assert_equal("_(287)_", index['287'][:content]) new_docs = {} 1000.times {|i| new_docs[i.to_s] = {:id => i, :content => "Hash(#{i})"}} index.batch_update(new_docs) assert_equal(1000, index.size) assert_equal("Hash(78)", index['78'][:content]) end def test_index_multi_key index = Index.new(:analyzer => WhiteSpaceAnalyzer.new, :key => [:id, :table]) data = [ {:id => 0, :table => "product", :product => "tent"}, {:id => 0, :table => "location", :location => "first floor"}, {:id => 0, :table => "product", :product => "super tent"}, {:id => 0, :table => "location", :location => "second floor"}, {:id => 1, :table => "product", :product => "backback"}, {:id => 1, :table => "location", :location => "second floor"}, {:id => 1, :table => "location", :location => "first floor"}, {:id => 1, :table => "product", :product => "rucksack"}, {:id => 1, :table => "product", :product => "backpack"} ].each { |doc| index << doc } index.optimize assert_equal(4, index.size) assert_equal("super tent", index[0][:product]) assert_equal("second floor", index[1][:location]) assert_equal("backpack", index[3][:product]) assert_equal("first floor", index[2][:location]) index.close end def test_index_multi_key_untokenized field_infos = FieldInfos.new(:term_vector => :no) field_infos.add_field(:id, :index => :untokenized) field_infos.add_field(:table, :index => :untokenized) index = Index.new(:analyzer => Analyzer.new, :key => [:id, :table], :field_infos => field_infos) data = [ {:id => 0, :table => "Product", :product => "tent"}, {:id => 0, :table => "location", :location => "first floor"}, {:id => 0, :table => "Product", :product => "super tent"}, {:id => 0, :table => "location", :location => "second floor"}, {:id => 1, :table => "Product", :product => "backback"}, {:id => 1, :table => "location", :location => "second floor"}, {:id => 1, :table => "location", :location => "first floor"}, {:id => 1, :table => "Product", :product => "rucksack"}, {:id => 1, :table => "Product", :product => "backpack"} ].each {|doc| index << doc} assert_equal(4, index.size) index.optimize assert_equal("super tent", index[0][:product]) assert_equal("second floor", index[1][:location]) assert_equal("backpack", index[3][:product]) assert_equal("first floor", index[2][:location]) index.close end def test_sortby_date index = Index.new(:analyzer => WhiteSpaceAnalyzer.new) data = [ {:content => "one", :date => "20051023"}, {:content => "two", :date => "19530315"}, {:content => "three four", :date => "19390912"}, {:content => "one", :date => "19770905"}, {:content => "two", :date => "19810831"}, {:content => "three", :date => "19790531"}, {:content => "one", :date => "19770725"}, {:content => "two", :date => "19751226"}, {:content => "four", :date => "19390912"} ].each {|doc| index << doc} sf_date = SortField.new("date", {:type => :integer}) #top_docs = index.search("one", :sort => [sf_date, SortField::SCORE]) top_docs = index.search("one", :sort => Sort.new("date")) assert_equal(3, top_docs.total_hits) assert_equal("19770725", index[top_docs.hits[0].doc][:date]) assert_equal("19770905", index[top_docs.hits[1].doc][:date]) assert_equal("20051023", index[top_docs.hits[2].doc][:date]) top_docs = index.search("one two three four", :sort => [sf_date, SortField::SCORE]) assert_equal("19390912", index[top_docs.hits[0].doc][:date]) assert_equal("three four", index[top_docs.hits[0].doc][:content]) assert_equal("19390912", index[top_docs.hits[1].doc][:date]) assert_equal("four", index[top_docs.hits[1].doc][:content]) assert_equal("19530315", index[top_docs.hits[2].doc][:date]) top_docs = index.search("one two three four", :sort => [:date, :content]) assert_equal("19390912", index[top_docs.hits[0].doc][:date]) assert_equal("four", index[top_docs.hits[0].doc][:content]) assert_equal("19390912", index[top_docs.hits[1].doc][:date]) assert_equal("three four", index[top_docs.hits[1].doc][:content]) assert_equal("19530315", index[top_docs.hits[2].doc][:date]) index.close end # this test has been corrected to work as intended # it now fails the same way on both 1.8 and 1.9 -- sds def test_auto_flush fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir')) Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end} data = %w(one two three four five six seven eight nine ten eleven twelve) index1 = Index.new(:path => fs_path, :auto_flush => true, :key => :id) index1 << {:id => 0, :content => "zero"} index2 = Index.new(:path => fs_path, :auto_flush => true) begin n = 1 data.each do |datum| index1 << {:id => n, :content => datum} index2 << {:id => n, :content => datum} n += 1 end 5.times do |i| index1.delete(i) index2.delete(i + 5) end index1.optimize index2 << "thirteen" rescue Exception => e assert(false, "This should not cause an error when auto flush has been set") end index1.close index2.close end def test_doc_specific_analyzer index = Index.new index.add_document("abc", Ferret::Analysis::Analyzer.new) assert_equal(1, index.size) end def test_adding_empty_term_vectors index = Index.new(:field_infos => FieldInfos.new(:term_vector => :no)) # Note: Adding keywords to either field1 or field2 gets rid of the error index << {:field1 => ''} index << {:field2 => ''} index << {:field3 => 'foo bar baz'} index.flush index.close end def test_stopwords field_infos = FieldInfos.new(:store => :no, :term_vector => :no) field_infos.add_field(:id, :store => :yes, :index => :untokenized) i = Ferret::Index::Index.new(:or_default => false, :default_search_field => '*') # adding this additional field to the document leads to failure below # comment out this statement and all tests pass: i << {:id => 1, :content => "Move or shake"} hits = i.search 'move nothere shake' assert_equal 0, hits.total_hits hits = i.search 'move shake' assert_equal 1, hits.total_hits hits = i.search 'move or shake' assert_equal 1, hits.total_hits # fails when id field is present end def test_threading path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir')) index = Ferret::Index::Index.new(:path => path, :create => true) 100.times do |i| buf = '' doc = {} doc[:id] = i doc[:foo] = "foo #{i}" index << doc end threads = [] 4.times do threads << Thread.new(index) do |index| result = index.search('id:42') assert_equal(1, result.total_hits) end end threads.each{|t| t.join } end def test_wildcard j = nil Ferret::I.new do |i| i << "one" assert_equal(1, i.search("*").total_hits) i << "two" assert_equal(2, i.search("*").total_hits) i << {:content => "three"} assert_equal(3, i.search("*").total_hits) assert_equal(3, i.search("id:*").total_hits) assert_equal(2, i.search('id:?*').total_hits) j = i end assert_raise(StandardError) {j.close} end def check_highlight(index, q, excerpt_length, num_excerpts, expected, field = :field) highlights = index.highlight(q, 0, :excerpt_length => excerpt_length, :num_excerpts => num_excerpts, :field => field) assert_equal(expected, highlights) highlights = index.highlight(q, 1, :excerpt_length => excerpt_length, :num_excerpts => num_excerpts, :field => field) assert_equal(expected, highlights) end def test_highlighter() index = Ferret::I.new(:default_field => :field, :default_input_field => :field, :analyzer => Ferret::Analysis::WhiteSpaceAnalyzer.new) [ "the words we are searching for are one and two also " + "sometimes looking for them as a phrase like this; one " + "two lets see how it goes", [ "the words we", "are searching", "for are one", "and two also", "sometimes looking", "for them as a", "phrase like this;", "one two lets see", "how it goes" ] ].each {|doc| index << doc } check_highlight(index, "one", 10, 1, ["...are one..."]) check_highlight(index, "one", 10, 2, ["...are one...","...this; one..."]) check_highlight(index, "one", 10, 3, ["the words...","...are one...","...this; one..."]) check_highlight(index, "one", 10, 4, ["the words we are...","...are one...","...this; one..."]) check_highlight(index, "one", 10, 5, ["the words we are searching for are one...","...this; one..."]) check_highlight(index, "one", 10, 20, ["the words we are searching for are one and two also " + "sometimes looking for them as a phrase like this; one " + "two lets see how it goes"]) check_highlight(index, "one", 200, 1, ["the words we are searching for are one and two also " + "sometimes looking for them as a phrase like this; one " + "two lets see how it goes"]) check_highlight(index, "(one two)", 15, 2, ["...one and two...","...this; one two..."]) check_highlight(index, 'one two "one two"', 15, 2, ["...one and two...","...this; one two..."]) check_highlight(index, 'one two "one two"', 15, 1, ["...this; one two..."]) check_highlight(index, '"one two"', 15, 1, nil, :not_a_field) check_highlight(index, 'wrong_field:one', 15, 1, nil, :wrong_field) check_highlight(index, '"the words" "for are one and two" words one two', 10, 1, ["the words..."]) check_highlight(index, '"the words" "for are one and two" words one two', 20, 2, ["the words we are...","...for are one and two..."]) index.close end def test_changing_analyzer index = Ferret::I.new a = Ferret::Analysis::WhiteSpaceAnalyzer.new(false) index.add_document({:content => "Content With Capitals"}, a) tv = index.reader.term_vector(0, :content) assert_equal("Capitals", tv.terms[0].text) index.close end def test_top_doc_to_json index = Ferret::I.new [ {:f1 => "one"}, {:f2 => ["two",2,2.0]}, {:f3 => 3}, {:f4 => 4.0}, {:f5 => "five", :funny => '"' * 10_000} ].each {|doc| index << doc} json_str = index.search("one two 3 4.0 five", :sort => Ferret::Search::Sort::INDEX_ORDER).to_json assert(json_str == '[{"f1":"one"},{"f2":["two","2","2.0"]},{"f3":"3"},{"f4":"4.0"},{"f5":"five","funny":"' + '\'"\'' * 10_000 + '"}]' || json_str == '[{"f1":"one"},{"f2":["two","2","2.0"]},{"f3":"3"},{"f4":"4.0"},{"funny":"' + '\'"\'' * 10_000 + '","f5":"five"}]') assert_equal('[]', index.search("xxx").to_json) index.close end def test_large_query_delete index = Ferret::I.new 20.times do index << {:id => 'one'} index << {:id => 'two'} end index.query_delete('id:one') assert_equal(20, index.size) end def test_query_update_delete_more_than_ten index = Ferret::I.new 20.times {|i| index << {:id => i, :find => 'match', :change => 'one'} } assert_equal(20, index.search('find:match').total_hits) index.query_update('find:match', {:change => 'two'}) assert_equal(20, index.search('find:match AND change:two').total_hits) index.query_delete('find:match') assert_equal(0, index.size) end end ferret-0.11.8.6/test/unit/index/tc_index_writer.rb0000644000004100000410000006636212476264460022077 0ustar www-datawww-data# encoding: utf-8 require File.dirname(__FILE__) + "/../../test_helper" class IndexWriterTest < Test::Unit::TestCase include Ferret::Index include Ferret::Analysis def setup() @dir = Ferret::Store::RAMDirectory.new fis = FieldInfos.new() fis.create_index(@dir) end def teardown() @dir.close() end def test_initialize wlock = @dir.make_lock(IndexWriter::WRITE_LOCK_NAME) clock = @dir.make_lock(IndexWriter::COMMIT_LOCK_NAME) assert(! wlock.locked?) assert(! clock.locked?) iw = IndexWriter.new(:dir => @dir, :create => true) assert(@dir.exists?("segments")) assert(wlock.locked?) iw.close() assert(@dir.exists?("segments")) assert(! wlock.locked?) assert(! clock.locked?) end def test_add_document iw = IndexWriter.new(:dir => @dir, :analyzer => StandardAnalyzer.new(), :create => true) iw << {:title => "first doc", :content => ["contents of", "first doc"]} assert_equal(1, iw.doc_count) iw << ["contents of", "second doc"] assert_equal(2, iw.doc_count) iw << "contents of third doc" assert_equal(3, iw.doc_count) iw.close() end def test_add_documents_fuzzy iw = IndexWriter.new(:dir => @dir, :analyzer => StandardAnalyzer.new()) iw.merge_factor = 3 iw.max_buffered_docs = 3 # add 100 documents 100.times do doc = random_doc() iw.add_document(doc) end assert_equal(100, iw.doc_count) iw.close() end def test_adding_long_url iw = IndexWriter.new(:dir => @dir, :default_field => 'content') iw << {:content => "http://" + 'x' * 255} # The following line will cause a segfault prior to 0.11.6 iw << {:content => "http://" + 'x' * 1_000_000} end private WORDS = [ "desirous", "hollowness's", "camp's", "Senegal", "broadcaster's", "pecking", "Provence", "paternalism", "premonition", "Dumbo's", "Darlene's", "Elbert's", "substrate", "Camille", "Menkalinan", "Cooper", "decamps", "abatement's", "bindings", "scrubby", "subset", "ancestor's", "pelagic", "abscissa", "loofah's", "gleans", "boudoir", "disappointingly", "guardianship's", "settlers", "Mylar", "timetable's", "parabolic", "madams", "bootlegger's", "monotonically", "gage", "Karyn's", "deposed", "boozy", "swordfish's", "Chevron", "Victrola", "Tameka", "impels", "carrels", "salami's", "celibate", "resistance's", "duration", "abscissae", "Kilroy's", "corrosive", "flight's", "flapper", "scare", "peppiest", "Pygmies", "Menzies", "wrist's", "enumerable", "housecoats", "Khwarizmi's", "stampeding", "hungering", "steeping", "Yemenis", "entangles", "solver", "mishapping", "Rand's", "ninety", "Boris", "impedimenta", "predators", "ridge", "wretchedness's", "crapping", "Head", "Edwards", "Claude's", "geodesics", "verities", "botch", "Short's", "vellum's", "coruscates", "hydrogenates", "Haas's", "deceitfulness", "cohort's", "Cepheus", "totes", "Cortez's", "napalm", "fruitcake", "coordinated", "Coulomb", "desperation", "behoves", "contractor's", "vacationed", "Wanamaker's", "leotard", "filtrated", "cringes", "Lugosi", "sheath's", "orb", "jawed", "Isidro", "geophysics", "persons", "Asians", "booze's", "eight's", "backslappers", "hankered", "dos", "helpings", "tough", "interlarding", "gouger", "inflect", "Juneau's", "hay's", "sardining", "spays", "Brandi", "depressant", "space", "assess", "reappearance's", "Eli's", "Cote", "Enoch", "chants", "ruffianing", "moralised", "unsuccessfully", "or", "Maryland's", "mildest", "unsafer", "dutiful", "Pribilof", "teas", "vagued", "microbiologists", "hedgerow", "speller's", "conservators", "catharsis", "drawbacks", "whooshed", "unlawful", "revolve", "craftsmanship", "destabilise", "Margarito", "Asgard's", "spawn's", "Annabel's", "canonicals", "buttermilk", "exaltation's", "pothole", "reprints", "approximately", "homage", "Wassermann's", "Atlantic's", "exacerbated", "Huerta", "keypunching", "engagements", "dilate", "ponchos", "Helvetius", "Krakatoa", "basket's", "stepmother", "schlock's", "drippings", "cardiology's", "northwesterly", "cruddier", "poesies", "rustproof", "climb", "miscalled", "Belgians", "Iago", "brownout", "nurseries", "hooliganism's", "concourse's", "advocate", "sunrise's", "hyper", "octopus's", "erecting", "counterattacking", "redesign", "studies", "nitrating", "milestone", "bawls", "Nereid", "inferring", "Ontario's", "annexed", "treasury", "cosmogony's", "scandalised", "shindig's", "detention's", "Lollobrigida's", "eradicating", "magpie", "supertankers", "Adventist's", "dozes", "Artaxerxes", "accumulate", "dankest", "telephony", "flows", "Srivijaya's", "fourteen's", "antonym", "rancid", "briefing's", "theologian", "Jacuzzi", "gracing", "chameleon's", "Brittney's", "Pullmans", "Robitussin's", "jitterier", "mayonnaise's", "fort", "closeouts", "amatory", "Drew's", "cockfight", "pyre", "Laura's", "Bradley's", "obstructionists", "interventions", "tenderness's", "loadstones", "castigation's", "undercut", "volubly", "meditated", "Ypsilanti", "Jannie's", "tams", "drummer's", "inaugurations", "mawing", "Anglophile", "Sherpa", "footholds", "Gonzalo", "removers", "customisation", "procurement's", "allured", "grimaced", "captaining", "liberates", "grandeur's", "Windsor", "screwdrivers", "Flynn's", "extortionists", "carnivorous", "thinned", "panhandlers", "trust's", "bemoaned", "untwisted", "cantors", "rectifies", "speculation", "niacin's", "soppy", "condom", "halberd", "Leadbelly", "vocation's", "tanners", "chanticleer", "secretariats", "Ecuador's", "suppurated", "users", "slag's", "atrocity's", "pillar", "sleeveless", "bulldozers", "turners", "hemline", "astounded", "rosaries", "Mallarmé", "crucifies", "Maidenform", "contribution", "evolve", "chemicals", "uteri", "expostulation", "roamers", "daiquiris", "arraignment", "ribs", "King's", "Persepolis", "arsenic's", "blindfolds", "bloodsucker's", "restocks", "falconry", "Olympia's", "Colosseum's", "vigils", "Louie's", "unwillingly", "sealed", "potatoes", "Argentine", "audit's", "outworn", "boggles", "likely", "alleging", "Tinkerbell", "redistribution's", "Normandy", "Cortes", "porter's", "buntings", "cornucopias", "rosewoods", "shelf's", "airdrops", "summits", "Rosalyn", "redecorating", "twirlers", "monsters", "directed", "semiautomatics", "Foch", "Hobart", "mutilates", "Wilma's", "ornamenting", "Clifford's", "pyromania", "Strasbourg", "bleeders", "additions", "super", "effortlessly", "piecing", "vacations", "gybes", "warranted", "Ting", "her", "histrionic", "marshaled", "spore's", "villainy's", "brat", "confusion", "amphitheatre's", "adjourns", "guzzled", "Visayans", "rogue's", "morsels", "candlestick", "flaks", "Waterbury", "pulp's", "endorser's", "postdoc", "coffining", "swallowing", "Wrangell", "Marcie's", "Marley", "untapped", "fear's", "Kant", "pursuit's", "normally", "jackals", "orals", "Paramaribo's", "Marilyn's", "Diem's", "narrower", "medicinally", "chickweed's", "pretentiousness", "Lardner", "baritone's", "purrs", "Pam's", "pestles", "Philip's", "Titania", "eccentrics", "Albion's", "greed's", "raggediest", "importations", "Truman", "incentives", "typified", "incurred", "bandstands", "Minnie's", "pleasant", "Sandy's", "perplexities", "crease's", "obliques", "backstop", "Nair's", "perusing", "Quixote's", "sicknesses", "vapour's", "butte", "lariats", "disfavours", "McGuffey", "paediatric", "filtered", "whiff's", "gunboats", "devolved", "extravaganza's", "organism", "giggling", "citadel's", "counterbalances", "executrixes", "Cathay", "marshmallow's", "iniquitous", "Katmai", "Siva", "welled", "impertinence's", "plunger", "rice", "forgers", "Larousse", "pollution's", "medium", "residue's", "rumbas", "Odis", "arrogant", "Jasper's", "panged", "doubted", "vistaing", "decibel's", "modulus's", "chickpea's", "mugger's", "potentates", "sequesters", "academy's", "Turk's", "pharmacology's", "defogger", "clomp", "soulless", "elastic", "la's", "shards", "unfortunate", "counterclaim's", "objections", "towel", "converged", "z", "ionisation", "stirrups", "antiquarians", "constructor", "virtuosity's", "Göteborg", "centigramme's", "translators", "dalliance's", "us", "bullfight", "drawer's", "nonconformist", "handcrafts", "Magritte", "tulle", "plant's", "routine", "colour's", "latency's", "repertoire's", "photocopies", "catalyse", "ashrams", "lagging", "flapjack's", "ayatollahs", "decentest", "pitted", "conformity", "jack", "batsman", "electrifies", "Unitarians", "obtain", "medicates", "tumour's", "nutritionally", "haystack", "bustles", "slut", "satirising", "birettas", "starring", "Kubrick's", "flogs", "chequering", "Menkalinan's", "Barbados's", "Bioko", "swinish", "hades", "perjured", "timing's", "cocaine", "ejecting", "rationalises", "dilettante's", "umping", "capsized", "frogmen", "matt", "prostituting", "bola's", "devolution's", "poxing", "Maritza's", "snob's", "scoped", "Costco", "feral", "sirocco", "rebating", "truculence", "junkier", "nabs", "elicit", "allegiance", "care", "arteriosclerosis's", "nonproliferation's", "doxologies", "disconsolate", "bodega", "designers", "Rembrandt", "apostasies", "garrulousness", "Hertzsprung's", "hayseeds", "noncooperation's", "resentment", "cuticles", "sandboxes", "gimmicks", "magnolia", "invalidity's", "pulverised", "Tinkerbell's", "hypoglycemics", "gunboat's", "workbench's", "fleetingly's", "sportsman's", "trots", "decomposes", "discrepancies", "owls", "obscener", "organic", "stoutness", "councillor's", "Philippine's", "Aline", "coarsening", "suffocated", "infighting's", "peculiarity", "roof's", "premier", "sucked", "churl", "remounts", "intends", "wiles", "unfold", "unperturbed", "wainscotings", "restfuller", "ashtray's", "wader's", "decanters", "gild", "tandems", "spooked", "galling", "annuity's", "opacity", "clamour's", "flaccid", "caroming", "savvying", "mammalian's", "toadstool's", "doohickey", "jibs", "conquests", "dishes", "effusively", "distinctions", "curly", "Peckinpah", "whining", "quasar", "sponge", "infrequent", "Novembers", "cowling", "poem's", "muzzles", "Sufi", "authoritarians", "prompts", "Gavin's", "morphology's", "shenanigan", "narrated", "rapprochement", "Heine", "propane's", "addition", "prefect's", "pining", "dwindles", "compulsiveness's", "objectors", "trudging", "segregates", "language", "enthralled", "explosiveness", "toeing", "drainers", "Merrimack's", "smarten", "bigwig's", "embroiders", "Medicaids", "grammar's", "behest's", "chiseled", "equalled", "factual", "Casablanca's", "dams", "disillusioned", "turtleneck", "Baden", "provinces", "bushwhacked", "fey", "Yangtze", "loan's", "decent", "strobe", "challenger's", "hometown", "Neal", "Ernestine's", "magnetises", "minute", "patrol", "Starbucks", "Bernstein", "signal", "interplanetary", "tweak", "archdeacon", "untoward", "transducer", "azaleas", "levied", "worlds", "talks", "Tancred", "hairsplitting's", "edibility's", "confab", "rosetted", "Spanish", "Americanisation", "Charley", "realm's", "incongruities", "chinstraps", "dollhouses", "binocular", "popgun", "physiotherapy's", "knave's", "angelically", "heartbreaking", "clarions", "bespeaks", "pivotal", "Zosma", "ungrammatical", "dilution", "tidily", "Dejesus's", "taller", "pennyweight's", "freshman", "Jamestown", "chiefer", "amen", "attiring", "appurtenance's", "opiates", "mottoes", "towellings", "ashen", "font's", "spoors", "pupil", "groom's", "skimpy", "achieves", "intolerance's", "ardour's", "exorcist", "bottoming", "snag's", "Frenches", "hysteric's", "ladyfinger's", "differences", "seed", "clubfoot's", "glades", "Elton's", "jargon", "Waldo", "grinning", "coherence's", "winos", "turnround", "appended", "Ethelred's", "delete", "steadfastness's", "miss", "thermoplastic", "depraves", "unctuous", "reanimates", "transfusing", "protects", "Babbage's", "foists", "inn", "etched", "sanctimoniously", "idling", "timepiece", "holistic", "waterside", "ulna's", "swindled", "employables", "zebra", "nieces", "pertained", "usages", "vamp's", "Larry's", "cooler's", "holographs", "clewing", "stubborning", "peaked", "underfeeds", "marshmallows", "agreeable", "beards", "Slovenia's", "nitroglycerin", "palls", "impurer", "armours", "stomachaches", "notification's", "Dixieland's", "crozier's", "neurotic", "kudos", "Tania's", "M", "soundtrack's", "territory's", "sped", "house's", "divisibility", "ingress's", "pummelled", "Isabel", "Dewitt", "seemly", "hutched", "calliope", "lengthwise", "flubs", "Moldavia's", "Mercia", "McBride's", "Lenten", "pulverise", "football", "oligarchy", "Max", "scribbler", "acclimatize", "brainwashes", "apprenticed", "benevolences", "two", "Wodehouse", "crew's", "massacre", "proportionals", "Jewishness's", "instep's", "emissary", "folder", "nonentity's", "convinced", "caption", "kangarooed", "dogie", "vagabonding", "auction's", "appraising", "antimony", "part's", "longitude's", "inconsiderateness's", "pawning", "serer", "solos", "histories", "mushy", "parturition", "munched", "oregano", "inanest", "dryness", "kitchenware", "unexpected", "covens", "cheesecakes", "stakeout's", "Pulaski's", "Yoknapatawpha's", "pinhead", "drifted", "guzzler's", "funking", "sou'wester", "oesophagus's", "highbrow", "contralto", "meningitis", "Mazzini", "raggedest", "vaginas", "misfiring", "margaritas", "wedder", "pointed", "slicked", "garlanded", "comeuppances", "vassals", "Sui", "Concord", "bozos", "Garry's", "Maribel's", "epileptic", "Jehoshaphat's", "revolutionary's", "kneecaps", "songbird", "actively", "Meredith", "toddler", "distrusting", "fuchsias", "perusal", "instills", "deathbed", "sunspot's", "spatula's", "Muscovy", "humaniser", "Keats", "regrets", "deflect", "theories", "nonpluses", "populating", "leniency's", "penicillin's", "gaol's", "borough", "moose's", "dogmata", "transcendentally", "supposition's", "nursed", "Gagarin's", "honest", "Chandrasekhar's", "mudslinger's", "parable", "bonged", "Wyeth's", "Ochoa's", "Grenoble", "steamy", "halter's", "rotisserie's", "pagoda's", "wallaby's", "Yank", "pretzel", "rapist's", "estrange", "hectored", "Puebla's", "conniver", "creditor's", "dole's", "Fotomat", "patents", "heckling", "thickener", "etches", "yogi", "hemstitched", "obverses", "Lipizzaner", "divert", "Strong's", "sagest", "Alabama", "He", "Carrie's", "obligation's", "verity's", "outed", "Rhee", "bluffed", "codas", "crèche's", "unpalatable", "dilettanti", "vestment", "purse's", "inflammation's", "bookmarked", "doing's", "whinnying", "impersonators", "Theiler", "scurried", "resistor", "southerners", "Anacreon", "reconstruction's", "footage", "trespassing", "Kafka", "bottling", "stays", "Gretzky", "overburdening", "princesses", "weathercock's", "atolls", "cheerier", "packet", "surrenders", "teacup", "Sabik's", "undecidable", "lollygagged", "pawl's", "anaesthesiology", "sublimely", "contortionists", "motorcades", "Maureen", "lamasery", "yourselves", "Creighton", "poliomyelitis's", "civil", "outmanoeuvre", "lauded", "closeness", "Humboldt's", "pretzels", "ungrudging", "blackguard's", "sickles", "typo", "narcotics", "linesman", "psychotics", "pictured", "deviltry", "Yahtzee", "Lovelace's", "cerebra", "airiness's", "bewitch", "how", "motherland's", "crate's", "Keenan's", "turnstile's", "pedometer's", "carted", "slipping", "fallow", "Canadian", "ladybird's", "thump", "shopper's", "enters", "scowls", "nematode", "focused", "Riley's", "grainiest", "novas", "snuffled", "leftovers", "deify", "Samoan", "pruning", "contenting", "Khachaturian's", "triads", "genealogies", "psalmist", "shaming", "appropriated", "ignominies", "Beadle's", "MHz", "peerages", "facile", "Seoul", "Janna's", "jig's", "mousiness's", "funnier", "delimiter", "watermark", "sheik's", "Reasoner", "ipecac's", "curdles", "wronged", "Segovia's", "solders", "Dunne's", "contractor", "awards", "hostels", "pinkie's", "Herzl", "misplace", "shuttle", "innovative", "vestries", "cosmoses", "trikes", "Casandra's", "hokier", "carouser's", "summerhouses", "renascence", "decomposed", "Balzac's", "outlast", "shod", "squalling", "smugging", "weighing", "omega's", "selects", "fleetingly", "Finland", "petted", "disrespects", "fetter", "confound", "brads", "Bosnia's", "preposition's", "guy's", "different", "tracts", "paediatrics's", "polygon", "eyetooth's", "Aesop", "pentagons", "professions", "homeowner", "looter's", "intimidated", "lustre's", "loneliness", "catnapped", "counties", "pailful", "Christendom's", "Barents", "penis", "Mumford's", "Nigel", "éclairs", "splats", "diabolical", "popularly", "quart", "abjected", "Rasalgethi", "camel's", "inimical", "overweening", "distention's", "Advil", "casement", "seamier", "avaricious", "sierra's", "caparison's", "moldered", "Cortez", "handmaid's", "disappointment", "billowed", "overpopulated", "outsets", "ray", "smoother", "overkill", "somber", "tiller's", "zigzag", "adviser", "absorption's", "sturdily", "hairy", "bloodmobile", "investiture's", "creature", "ripeness's", "Jonathon", "arborvitae's", "skulduggery", "bog", "skeleton's", "Kit's", "Panamas", "Ashlee's", "jazzy", "snit", "divisive", "caribous", "permuting", "frankest", "annotated", "oak's", "meg's", "Gill", "burrito", "dormancy's", "offings", "Nike", "outnumbered", "skater's", "Portugal", "deficit", "Cannon's", "pockmark", "sediment's", "mailbox", "innuendoed", "retire", "wolfhound's", "nicotine's", "brigade's", "mettle's", "softhearted", "hooey's", "abdication", "Orval", "Jaime", "ship", "hyphenations", "sectarians", "Alabaman", "tagging", "ultras", "schizoids", "medicines", "undersized", "Gray", "maternity's", "bandaging", "scooping", "coercion's", "serapes", "celebrate", "Listerine's", "throve", "crypt's", "nearsighted", "metallurgists", "Delicious", "cotton's", "yoked", "cogitates", "underage", "cigarette's", "hallways", "Cointreau", "ma'am", "spacing's", "foresight", "parkway's", "Edwardian", "mediator", "Turner", "Derrida's", "motorist's", "hobo", "equivalences", "sophism", "peeping", "telescoped", "overproduce", "ductility", "Leblanc", "refractory", "passé", "decodes", "womanising", "flax's", "pond's", "infrequency", "talkativeness's", "settlement's", "Prince", "bating", "multimillionaire", "Schultz", "premiss", "quackery", "bathhouse", "Leno's", "Monday's", "Hung's", "undaunted", "bewaring", "tension's", "Chile's", "Rostand's", "platoons", "rodeo's", "Dionne", "Dyson's", "gingivitis's", "fewer", "electromagnetism's", "scrubbier", "ensconced", "wretcheder", "mica's", "expectorant", "snapper's", "chastised", "habitation", "spry", "bathing", "stealth's", "champagnes", "baleful", "fencing's", "threaded", "codicils", "disgraced", "redcaps", "addends", "Olivier", "clasped", "Gwendolyn", "foment", "angularity's", "strenuously", "gorilla", "misbehaved", "surplus's", "newsier", "positioned", "bloodmobiles", "circumstantials", "person's", "varicose", "Calliope", "plethora", "Olmsted", "reconciliation", "Brendan's", "beset", "totters", "sailors", "parliamentarians", "Whitaker", "hilts", "pummelling", "academician's", "ruse", "discreeter", "appetisingly", "perfections", "anus", "overrode", "pedantry's", "possessed", "germs", "unscrews", "expired", "semitrailer's", "Cupid's", "nonsmoker", "Marathon", "secs", "Hopkins", "freeing", "libelled", "furious", "staccatos", "electroencephalogram's", "malingerer's", "impulses", "briars", "Tran", "hilltops", "sulks", "quailed", "fads", "retrenches", "spouted", "outtake", "puncture's", "rats", "kibitzed", "berets", "omnivorous", "flange", "Mons", "glints", "mansards", "thou", "cuing", "suspected", "Kaiser's", "savvier", "skits", "interdict's", "Booker", "Rubinstein", "Tm's", "crossing's", "dewlap", "guarantor's", "edification's", "joyfullest", "crossed", "chowdering", "sillier", "reloading", "commodity's", "bodkins", "conduced", "coughs", "nucleus's", "sixtieth", "proverbially", "comprehensive", "ineluctably", "patrolmen", "resuscitating", "carpetbag's", "Darrin's", "Yeager", "Bataan's", "spoonsful", "proceeds", "wrongdoer", "Karroo", "heart", "poison", "typifying", "endowment's", "aquanauts", "deaconesses", "homosexuality", "Maxine", "haunching", "centred", "Peking's", "toothiest", "growers", "firebombs", "throbs", "Downy", "contribution's", "sago's", "Cole", "Knoxville", "leftmost", "Nell's", "Baffin", "barrings", "contagions", "disencumbers", "countdown", "quintuple", "perihelion", "creationism's", "actioning", "admiralty", "Mt's", "durability's", "sewer's", "replicas", "oxide", "ripened", "Pisces's", "Cinerama's", "catheters", "oppressive", "roosting", "foggiest", "properly", "Kareem", "Ollie", "minuted", "vehicles", "eel", "remunerates", "swashbuckler's", "remunerative", "sanguining", "Belem's", "forlornly", "rudders", "officialdom", "countertenors", "Upton", "whoop", "animations", "arouses", "millionths", "videocassette", "fledgling", "shake", "exterminated", "Cain's", "trendiest", "wariest", "torpedoes", "airmails", "Cameron's", "discord's", "spitefulness's", "thudded", "menaced", "takeovers", "solicited", "wallpapers", "economic", "cache", "rechargeable", "gongs", "droning", "exemption", "Alaskans", "toothed", "snifter", "Stephens", "prejudge", "doctor's", "bobolinks", "rotates", "valuation's", "narrator", "weaning", "uncle", "shelter", "destitution's", "Edgardo's", "gauge", "Nice", "Adolf's", "rheumatics", "inheritances", "undesirables", "Eileen's", "flyweight's", "scope", "possessiveness", "tipsily", "effulgence", "rematch", "Baltic", "unsteadiest", "rodeos", "gloaming's", "ringers", "randomised", "commissars", "destroyer's", "router", "disengaging", "it's", "Albert", "rampantly", "varmint", "Adkins", "chevron", "insomniac", "bobsledded", "masochist's", "chronometers", "compaction", "Mauro", "sidled", "Highlander's", "snail's", "syllabifies", "application's", "symmetrical", "blacking", "accent's", "sentimentalists", "sonatas", "profanities", "sloping", "Araby", "percolate", "repeated", "youthfulness's", "Loyola", "deliriously", "matriarch's", "tailors", "rerouting", "hairpin", "dispersal", "endowment", "disquieting", "swat", "neckerchieves", "wrinkles", "amoebas", "Darcy", "orthodontics's", "milder", "sneezing", "prescience's", "pads", "wrought", "perspicuity's", "materialist", "pull", "laundryman's", "lazily", "protractor's", "Vic", "photocopier", "guardrooms", "cablecasting", "confirms", "excretions", "combatant", "counterfeiters", "periwig", "genteelest", "router's", "springy", "procreated", "syphon", "parent's", "bigwigs", "rebelled", "milkmaids", "McGee's", "seaworthier", "Bellatrix's", "tenement", "embryologists", "Vaselining", "burrow's", "tonnage's", "Petty's", "chancels", "scouring", "mouser", "recompensed", "guarding", "editor", "raster", "bourgeoisie's", "interpolating", "skinflint's", "transport", "bullfinch", "needlessly", "withholds", "counterclockwise", "panicking", "Ahriman", "flambeing", "contrary", "heartstrings", "whittled", "crib's", "highlighter", "extroverted", "Martinique's", "racquets", "Maldivian", "physiognomy", "Hammarskjold", "massage", "shingling", "neighbourhood", "boobed", "vulture", "intercontinental", "cobblers", "peddlers", "forthrightly", "germicide", "raindrop's", "fir's", "decaffeinates", "wobblier", "abnegated", "cruiser's", "satiety", "trilled", "impending", "gulf", "mountebank", "beltway", "reappointment", "cinematographer", "pylon", "penthouses", "morally", "installs", "Walsh's", "drawstring", "circus's", "Khayyam's", "Myrtle's", "ventrals", "category's", "opportunistic", "grovelling", "warier", "upchuck", "hairdresser's", "Montanans", "jobber", "dazzle", "encirclement's", "muffin's", "coronets", "focus's", "footfall's", "subjunctives", "late", "pedagogued", "dignitaries", "content", "blockbusters", "reminiscent", "mayor", "specifier", "extinction", "nutshell's", "catbird's", "bundle", "gracefulness", "exceed", "estranges", "chancy", "bankrupted", "Avery", "Barnett", "succulence", "stacking", "ensnare", "truck", "embargo", "persecutes", "translation's", "muskrat's", "illumines", "undercoat's", "fleecier", "brick", "qualities", "imprecision", "reprisals", "discounts", "harmonics", "Mann's", "terrorism", "interminable", "Santiago's", "deepness", "tramples", "golder", "voyeurism's", "tent", "particle's", "minuend", "waxwings", "knobby", "trustee", "funnily", "hotheadedness's", "Kristin", "what", "bite", "murmur's", "pustule's", "weeknights", "rocked", "athlete", "ventilates", "impresses", "daguerreotyping", "Gross", "gambols", "villa", "maraud", "disapproval", "apostrophe's", "sheaf", "noisemaker's", "autonomy's", "massing", "daemon's", "Thackeray", "fermenting", "whammy", "philosophise", "empathy", "calamities", "sunbathe", "Qom", "yahoo's", "coxcomb's", "move", "school's", "rainmakers", "shipwreck", "potbelly's", "courageously", "current", "Aleut", "treaties", "U", "always", "Bosch", "impregnating", "bud's", "carat", "centrists", "acquaintance's", "convoy's", "chichis", "restraint's", "Cosby", "factotums", "handshaking", "paragon's", "mileages", "Tammie", "cartoonists", "lemmas", "lowliness's", "onion's", "E's", "Bible", "Cranmer", "fob's", "minks", "overstocking", "Willamette", "needle's", "scuppers", "Carborundum", "upwardly", "tallies", "aptitude", "synod", "nasturtium's", "Pensacola", "snappish", "merino", "sups", "fingerboard's", "prodigy's", "narcissism's", "substantial", "lug", "establishing", "Vergil's", "patrimonies", "shorted", "forestation", "undeniable", "Katmandu", "lamination", "trollop's", "odd", "stanza", "paraplegic", "melanin", "Rico", "foreman", "stereotypes", "affinity's", "cleansing", "sautéing", "epochs", "crooners", "manicured", "undisclosed", "propel", "usage", "Alioth's", "Aurelia's", "peruse", "Vassar's", "Demosthenes's", "Brazos", "supermarket", "scribbles", "Jekyll's", "discomfort's", "mastiffs", "ballasting", "Figueroa", "turnstiles", "convince", "Shelton's", "Gustavo", "shunting", "Fujitsu's", "fining's", "hippos", "dam's", "expressionists", "peewee", "troop's" ] WORDS_SIZE = WORDS.size def random_word return WORDS[rand(WORDS_SIZE)] end def random_sentence(max_len) sentence = "" (1 + rand(max_len)).times { sentence << " " << random_word } return sentence end def random_doc(max_fields = 10, max_elements = 10, max_len = 100) doc = {} (1 + rand(max_fields)).times do field = random_word.intern elem_count = rand(max_elements) + 1 if (elem_count == 1) doc[field] = random_sentence(max_len) else doc[field] = [] elem_count.times { doc[field] << random_sentence(max_len)} end end return doc end end ferret-0.11.8.6/test/unit/index/tc_index_reader.rb0000644000004100000410000004464612476264460022026 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" module IndexReaderCommon include Ferret::Index include Ferret::Analysis def test_index_reader do_test_get_field_names() do_test_term_enum() do_test_term_doc_enum() do_test_term_vectors() do_test_get_doc() end def do_test_get_field_names() field_names = @ir.field_names assert(field_names.include?(:body)) assert(field_names.include?(:changing_field)) assert(field_names.include?(:author)) assert(field_names.include?(:title)) assert(field_names.include?(:text)) assert(field_names.include?(:year)) end def do_test_term_enum() te = @ir.terms(:author) assert_equal('[{"term":"Leo","frequency":1},{"term":"Tolstoy","frequency":1}]', te.to_json); te.field = :author assert_equal('[["Leo",1],["Tolstoy",1]]', te.to_json(:fast)); te.field = :author assert(te.next?) assert_equal("Leo", te.term) assert_equal(1, te.doc_freq) assert(te.next?) assert_equal("Tolstoy", te.term) assert_equal(1, te.doc_freq) assert(! te.next?) te.field = :body assert(te.next?) assert_equal("And", te.term) assert_equal(1, te.doc_freq) assert(te.skip_to("Not")) assert_equal("Not", te.term) assert_equal(1, te.doc_freq) assert(te.next?) assert_equal("Random", te.term) assert_equal(16, te.doc_freq) te.field = :text assert(te.skip_to("which")) assert("which", te.term) assert_equal(1, te.doc_freq) assert(! te.next?) te.field = :title assert(te.next?) assert_equal("War And Peace", te.term) assert_equal(1, te.doc_freq) assert(!te.next?) expected = %w{is 1 more 1 not 1 skip 42 stored 1 text 1 which 1} te = @ir.terms(:text) te.each do |term, doc_freq| assert_equal(expected.shift, term) assert_equal(expected.shift.to_i, doc_freq) end te = @ir.terms_from(:body, "Not") assert_equal("Not", te.term) assert_equal(1, te.doc_freq) assert(te.next?) assert_equal("Random", te.term) assert_equal(16, te.doc_freq) end def do_test_term_doc_enum() assert_equal(IndexTestHelper::INDEX_TEST_DOCS.size, @ir.num_docs()) assert_equal(IndexTestHelper::INDEX_TEST_DOCS.size, @ir.max_doc()) assert_equal(4, @ir.doc_freq(:body, "Wally")) tde = @ir.term_docs_for(:body, "Wally") [ [ 0, 1], [ 5, 1], [18, 3], [20, 6] ].each do |doc, freq| assert(tde.next?) assert_equal(doc, tde.doc()) assert_equal(freq, tde.freq()) end assert(! tde.next?) tde = @ir.term_docs_for(:body, "Wally") assert_equal('[{"document":0,"frequency":1},{"document":5,"frequency":1},{"document":18,"frequency":3},{"document":20,"frequency":6}]', tde.to_json) tde = @ir.term_docs_for(:body, "Wally") assert_equal('[[0,1],[5,1],[18,3],[20,6]]', tde.to_json(:fast)) do_test_term_docpos_enum_skip_to(tde) # test term positions tde = @ir.term_positions_for(:body, "read") [ [false, 1, 1, [3]], [false, 2, 2, [1, 4]], [false, 6, 4, [3, 4]], [false, 9, 3, [0, 4]], [ true, 16, 2, [2]], [ true, 21, 6, [3, 4, 5, 8, 9, 10]] ].each do |skip, doc, freq, positions| if skip assert(tde.skip_to(doc)) else assert(tde.next?) end assert_equal(doc, tde.doc()) assert_equal(freq, tde.freq()) positions.each {|pos| assert_equal(pos, tde.next_position())} end assert_nil(tde.next_position()) assert(! tde.next?) tde = @ir.term_positions_for(:body, "read") assert_equal('[' + '{"document":1,"frequency":1,"positions":[3]},' + '{"document":2,"frequency":2,"positions":[1,4]},' + '{"document":6,"frequency":4,"positions":[3,4,5,6]},' + '{"document":9,"frequency":3,"positions":[0,4,13]},' + '{"document":10,"frequency":1,"positions":[1]},' + '{"document":16,"frequency":2,"positions":[2,3]},' + '{"document":17,"frequency":1,"positions":[2]},' + '{"document":20,"frequency":1,"positions":[21]},' + '{"document":21,"frequency":6,"positions":[3,4,5,8,9,10]}]', tde.to_json()) tde = @ir.term_positions_for(:body, "read") assert_equal('[' + '[1,1,[3]],' + '[2,2,[1,4]],' + '[6,4,[3,4,5,6]],' + '[9,3,[0,4,13]],' + '[10,1,[1]],' + '[16,2,[2,3]],' + '[17,1,[2]],' + '[20,1,[21]],' + '[21,6,[3,4,5,8,9,10]]]', tde.to_json(:fast)) tde = @ir.term_positions_for(:body, "read") do_test_term_docpos_enum_skip_to(tde) end def do_test_term_docpos_enum_skip_to(tde) tde.seek(:text, "skip") [ [10, 22], [44, 44], [60, 60], [62, 62], [63, 63], ].each do |skip_doc, doc_and_freq| assert(tde.skip_to(skip_doc)) assert_equal(doc_and_freq, tde.doc()) assert_equal(doc_and_freq, tde.freq()) end assert(! tde.skip_to(IndexTestHelper::INDEX_TEST_DOC_COUNT)) assert(! tde.skip_to(IndexTestHelper::INDEX_TEST_DOC_COUNT)) assert(! tde.skip_to(IndexTestHelper::INDEX_TEST_DOC_COUNT + 100)) tde.seek(:text, "skip") assert(! tde.skip_to(IndexTestHelper::INDEX_TEST_DOC_COUNT)) end def do_test_term_vectors() expected_tv = TermVector.new(:body, [ TVTerm.new("word1", 3, [2, 4, 7]), TVTerm.new("word2", 1, [3]), TVTerm.new("word3", 4, [0, 5, 8, 9]), TVTerm.new("word4", 2, [1, 6]) ], [*(0...10)].collect {|i| TVOffsets.new(i*6, (i+1)*6 - 1)}) tv = @ir.term_vector(3, :body) assert_equal(expected_tv, tv) tvs = @ir.term_vectors(3) assert_equal(3, tvs.size) assert_equal(expected_tv, tvs[:body]) tv = tvs[:author] assert_equal(:author, tv.field) assert_equal([TVTerm.new("Leo", 1, [0]), TVTerm.new("Tolstoy", 1, [1])], tv.terms) assert(tv.offsets.nil?) tv = tvs[:title] assert_equal(:title, tv.field) assert_equal([TVTerm.new("War And Peace", 1, nil)], tv.terms) assert_equal([TVOffsets.new(0, 13)], tv.offsets) end def do_test_get_doc() doc = @ir.get_document(3) [:author, :body, :title, :year].each {|fn| assert(doc.fields.include?(fn))} assert_equal(4, doc.fields.size) assert_equal(0, doc.size) assert_equal([], doc.keys) assert_equal("Leo Tolstoy", doc[:author]) assert_equal("word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", doc[:body]) assert_equal("War And Peace", doc[:title]) assert_equal("1865", doc[:year]) assert_nil(doc[:text]) assert_equal(4, doc.size) [:author, :body, :title, :year].each {|fn| assert(doc.keys.include?(fn))} assert_equal([@ir[0].load, @ir[1].load, @ir[2].load], @ir[0, 3].collect {|d| d.load}) assert_equal([@ir[61].load, @ir[62].load, @ir[63].load], @ir[61, 100].collect {|d| d.load}) assert_equal([@ir[0].load, @ir[1].load, @ir[2].load], @ir[0..2].collect {|d| d.load}) assert_equal([@ir[61].load, @ir[62].load, @ir[63].load], @ir[61..100].collect {|d| d.load}) assert_equal(@ir[-60], @ir[4]) end def test_ir_norms() @ir.set_norm(3, :title, 1) @ir.set_norm(3, :body, 12) @ir.set_norm(3, :author, 145) @ir.set_norm(3, :year, 31) @ir.set_norm(3, :text, 202) @ir.set_norm(25, :text, 20) @ir.set_norm(50, :text, 200) @ir.set_norm(63, :text, 155) norms = @ir.norms(:text) assert_equal(202, norms.bytes.to_a[ 3]) assert_equal( 20, norms.bytes.to_a[25]) assert_equal(200, norms.bytes.to_a[50]) assert_equal(155, norms.bytes.to_a[63]) norms = @ir.norms(:title) assert_equal(1, norms.bytes.to_a[3]) norms = @ir.norms(:body) assert_equal(12, norms.bytes.to_a[3]) norms = @ir.norms(:author) assert_equal(145, norms.bytes.to_a[3]) norms = @ir.norms(:year) # TODO: this returns two possible results depending on whether it is # a multi reader or a segment reader. If it is a multi reader it will # always return an empty set of norms, otherwise it will return nil. # I'm not sure what to do here just yet or if this is even an issue. #assert(norms.nil?) norms = " " * 164 @ir.get_norms_into(:text, norms, 100) assert_equal(202, norms.bytes.to_a[103]) assert_equal( 20, norms.bytes.to_a[125]) assert_equal(200, norms.bytes.to_a[150]) assert_equal(155, norms.bytes.to_a[163]) @ir.commit() iw_optimize() ir2 = ir_new() norms = " " * 164 ir2.get_norms_into(:text, norms, 100) assert_equal(202, norms.bytes.to_a[103]) assert_equal( 20, norms.bytes.to_a[125]) assert_equal(200, norms.bytes.to_a[150]) assert_equal(155, norms.bytes.to_a[163]) ir2.close() end def test_ir_delete() doc_count = IndexTestHelper::INDEX_TEST_DOCS.size @ir.delete(1000) # non existant doc_num assert(! @ir.has_deletions?()) assert_equal(doc_count, @ir.max_doc()) assert_equal(doc_count, @ir.num_docs()) assert(! @ir.deleted?(10)) [ [10, doc_count - 1], [10, doc_count - 1], [doc_count - 1, doc_count - 2], [doc_count - 2, doc_count - 3], ].each do |del_num, num_docs| @ir.delete(del_num) assert(@ir.has_deletions?()) assert_equal(doc_count, @ir.max_doc()) assert_equal(num_docs, @ir.num_docs()) assert(@ir.deleted?(del_num)) end @ir.undelete_all() assert(! @ir.has_deletions?()) assert_equal(doc_count, @ir.max_doc()) assert_equal(doc_count, @ir.num_docs()) assert(! @ir.deleted?(10)) assert(! @ir.deleted?(doc_count - 2)) assert(! @ir.deleted?(doc_count - 1)) del_list = [10, 20, 30, 40, 50, doc_count - 1] del_list.each {|doc_num| @ir.delete(doc_num)} assert(@ir.has_deletions?()) assert_equal(doc_count, @ir.max_doc()) assert_equal(doc_count - del_list.size, @ir.num_docs()) del_list.each {|doc_num| assert(@ir.deleted?(doc_num))} ir2 = ir_new() assert(! ir2.has_deletions?()) assert_equal(doc_count, ir2.max_doc()) assert_equal(doc_count, ir2.num_docs()) @ir.commit() assert(! ir2.has_deletions?()) assert_equal(doc_count, ir2.max_doc()) assert_equal(doc_count, ir2.num_docs()) ir2.close ir2 = ir_new() assert(ir2.has_deletions?()) assert_equal(doc_count, ir2.max_doc()) assert_equal(doc_count - 6, ir2.num_docs()) del_list.each {|doc_num| assert(ir2.deleted?(doc_num))} ir2.undelete_all() assert(! ir2.has_deletions?()) assert_equal(doc_count, ir2.max_doc()) assert_equal(doc_count, ir2.num_docs()) del_list.each {|doc_num| assert(! ir2.deleted?(doc_num))} del_list.each {|doc_num| assert(@ir.deleted?(doc_num))} ir2.commit() del_list.each {|doc_num| assert(@ir.deleted?(doc_num))} del_list.each {|doc_num| ir2.delete(doc_num)} ir2.commit() iw_optimize() ir3 = ir_new() assert(!ir3.has_deletions?()) assert_equal(doc_count - 6, ir3.max_doc()) assert_equal(doc_count - 6, ir3.num_docs()) ir2.close() ir3.close() end def test_latest assert(@ir.latest?) ir2 = ir_new() assert(ir2.latest?) ir2.delete(0) ir2.commit() assert(ir2.latest?) assert(!@ir.latest?) ir2.close() end end class MultiReaderTest < Test::Unit::TestCase include IndexReaderCommon def ir_new IndexReader.new(@dir) end def iw_optimize iw = IndexWriter.new(:dir => @dir, :analyzer => WhiteSpaceAnalyzer.new()) iw.optimize() iw.close() end def setup @dir = Ferret::Store::RAMDirectory.new() iw = IndexWriter.new(:dir => @dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true, :field_infos => IndexTestHelper::INDEX_TEST_FIS, :max_buffered_docs => 15) IndexTestHelper::INDEX_TEST_DOCS.each {|doc| iw << doc} # we mustn't optimize here so that MultiReader is used. #iw.optimize() unless self.class == MultiReaderTest iw.close() @ir = ir_new() end def teardown() @ir.close() @dir.close() end end class SegmentReaderTest < MultiReaderTest end class MultiExternalReaderTest < Test::Unit::TestCase include IndexReaderCommon def ir_new readers = @dirs.collect {|dir| IndexReader.new(dir) } IndexReader.new(readers) end def iw_optimize @dirs.each do |dir| iw = IndexWriter.new(:dir => dir, :analyzer => WhiteSpaceAnalyzer.new()) iw.optimize() iw.close() end end def setup() @dirs = [] [ [0, 10], [10, 30], [30, IndexTestHelper::INDEX_TEST_DOCS.size] ].each do |start, finish| dir = Ferret::Store::RAMDirectory.new() @dirs << dir iw = IndexWriter.new(:dir => dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true, :field_infos => IndexTestHelper::INDEX_TEST_FIS) (start...finish).each do |doc_id| iw << IndexTestHelper::INDEX_TEST_DOCS[doc_id] end iw.close() end @ir = ir_new end def teardown() @ir.close() @dirs.each {|dir| dir.close} end end class MultiExternalReaderDirTest < Test::Unit::TestCase include IndexReaderCommon def ir_new IndexReader.new(@dirs) end def iw_optimize @dirs.each do |dir| iw = IndexWriter.new(:dir => dir, :analyzer => WhiteSpaceAnalyzer.new()) iw.optimize() iw.close() end end def setup() @dirs = [] [ [0, 10], [10, 30], [30, IndexTestHelper::INDEX_TEST_DOCS.size] ].each do |start, finish| dir = Ferret::Store::RAMDirectory.new() @dirs << dir iw = IndexWriter.new(:dir => dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true, :field_infos => IndexTestHelper::INDEX_TEST_FIS) (start...finish).each do |doc_id| iw << IndexTestHelper::INDEX_TEST_DOCS[doc_id] end iw.close() end @ir = ir_new end def teardown() @ir.close() @dirs.each {|dir| dir.close} end end class MultiExternalReaderPathTest < Test::Unit::TestCase include IndexReaderCommon def ir_new IndexReader.new(@paths) end def iw_optimize @paths.each do |path| iw = IndexWriter.new(:path => path, :analyzer => WhiteSpaceAnalyzer.new()) iw.optimize() iw.close() end end def setup() base_dir = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/multidir')) FileUtils.mkdir_p(base_dir) @paths = [ File.join(base_dir, "i1"), File.join(base_dir, "i2"), File.join(base_dir, "i3") ] [ [0, 10], [10, 30], [30, IndexTestHelper::INDEX_TEST_DOCS.size] ].each_with_index do |(start, finish), i| path = @paths[i] iw = IndexWriter.new(:path => path, :analyzer => WhiteSpaceAnalyzer.new(), :create => true, :field_infos => IndexTestHelper::INDEX_TEST_FIS) (start...finish).each do |doc_id| iw << IndexTestHelper::INDEX_TEST_DOCS[doc_id] end iw.close() end @ir = ir_new end def teardown() @ir.close() end end class IndexReaderTest < Test::Unit::TestCase include Ferret::Index include Ferret::Analysis def setup() @dir = Ferret::Store::RAMDirectory.new() end def teardown() @dir.close() end def test_ir_multivalue_fields() @fs_dpath = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir')) @fs_dir = Ferret::Store::FSDirectory.new(@fs_dpath, true) iw = IndexWriter.new(:dir => @fs_dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true) doc = { :tag => ["Ruby", "C", "Lucene", "Ferret"], :body => "this is the body Document Field", :title => "this is the title DocField", :author => "this is the author field" } iw << doc iw.close() @dir = Ferret::Store::RAMDirectory.new(@fs_dir) ir = IndexReader.new(@dir) assert_equal(doc, ir.get_document(0).load) ir.close end def do_test_term_vectors(ir) expected_tv = TermVector.new(:body, [ TVTerm.new("word1", 3, [2, 4, 7]), TVTerm.new("word2", 1, [3]), TVTerm.new("word3", 4, [0, 5, 8, 9]), TVTerm.new("word4", 2, [1, 6]) ], [*(0...10)].collect {|i| TVOffsets.new(i*6, (i+1)*6 - 1)}) tv = ir.term_vector(3, :body) assert_equal(expected_tv, tv) tvs = ir.term_vectors(3) assert_equal(3, tvs.size) assert_equal(expected_tv, tvs[:body]) tv = tvs[:author] assert_equal(:author, tv.field) assert_equal([TVTerm.new("Leo", 1, [0]), TVTerm.new("Tolstoy", 1, [1])], tv.terms) assert(tv.offsets.nil?) tv = tvs[:title] assert_equal(:title, tv.field) assert_equal([TVTerm.new("War And Peace", 1, nil)], tv.terms) assert_equal([TVOffsets.new(0, 13)], tv.offsets) end def do_test_ir_read_while_optimizing(dir) iw = IndexWriter.new(:dir => dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true, :field_infos => IndexTestHelper::INDEX_TEST_FIS) IndexTestHelper::INDEX_TEST_DOCS.each {|doc| iw << doc} iw.close() ir = IndexReader.new(dir) do_test_term_vectors(ir) iw = IndexWriter.new(:dir => dir, :analyzer => WhiteSpaceAnalyzer.new()) iw.optimize() iw.close() do_test_term_vectors(ir) ir.close() end def test_ir_read_while_optimizing() do_test_ir_read_while_optimizing(@dir) end def test_ir_read_while_optimizing_on_disk() dpath = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir')) fs_dir = Ferret::Store::FSDirectory.new(dpath, true) do_test_ir_read_while_optimizing(fs_dir) fs_dir.close() end def test_latest() dpath = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir')) fs_dir = Ferret::Store::FSDirectory.new(dpath, true) iw = IndexWriter.new(:dir => fs_dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true) iw << {:field => "content"} iw.close() ir = IndexReader.new(fs_dir) assert(ir.latest?) iw = IndexWriter.new(:dir => fs_dir, :analyzer => WhiteSpaceAnalyzer.new()) iw << {:field => "content2"} iw.close() assert(!ir.latest?) ir.close() ir = IndexReader.new(fs_dir) assert(ir.latest?) ir.close() end end ferret-0.11.8.6/test/unit/search/0000755000004100000410000000000012476264460016502 5ustar www-datawww-dataferret-0.11.8.6/test/unit/search/tc_search_and_sort.rb0000644000004100000410000001706012476264460022657 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" class SearchAndSortTest < Test::Unit::TestCase include Ferret::Search include Ferret::Store include Ferret::Analysis include Ferret::Index def setup() @dir = RAMDirectory.new() iw = IndexWriter.new(:dir => @dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true, :min_merge_docs => 3) [ # len mod {:x => "findall", :string => "a", :int => "6", :float => "0.01"}, # 4 0 {:x => "findall", :string => "c", :int => "5", :float => "0.1"}, # 3 3 {:x => "findall", :string => "e", :int => "2", :float => "0.001"}, # 5 1 {:x => "findall", :string => "g", :int => "1", :float => "1.0"}, # 3 3 {:x => "findall", :string => nil, :int => "3", :float => "0.0001"}, # 6 2 {:x => "findall", :string => "", :int => "4", :float => "10.0"}, # 4 0 {:x => "findall", :string => "h", :int => "5", :float => "0.00001"}, # 7 3 {:x => "findall", :string => "f", :int => "2", :float => "100.0"}, # 5 1 {:x => "findall", :string => "d", :int => "3", :float => "1000.0"}, # 6 2 {:x => "findall", :string => "b", :int => "4", :float => "0.000001"} # 8 0 ].each do |doc| doc.extend(Ferret::BoostMixin) doc.boost = doc[:float].to_f iw << doc end iw.close end def teardown() @dir.close() end def do_test_top_docs(is, query, expected, sort = nil) top_docs = is.search(query, {:sort => sort}) top_docs.total_hits.times do |i| assert_equal(expected[i], top_docs.hits[i].doc) end # test sorting works for smaller ranged query offset = 3 limit = 3 top_docs = is.search(query, {:sort => sort, :offset => offset, :limit => limit}) limit.times do |i| assert_equal(expected[offset + i], top_docs.hits[i].doc) end end def test_sort_field_to_s() assert_equal("", SortField::SCORE.to_s); sf = SortField.new("MyScore", {:type => :score, :reverse => true}) assert_equal("MyScore:!", sf.to_s) assert_equal("", SortField::DOC_ID.to_s); sf = SortField.new("MyDoc", {:type => :doc_id, :reverse => true}) assert_equal("MyDoc:!", sf.to_s) sf = SortField.new(:date, {:type => :integer}) assert_equal("date:", sf.to_s) sf = SortField.new(:date, {:type => :integer, :reverse => true}) assert_equal("date:!", sf.to_s) sf = SortField.new(:price, {:type => :float}) assert_equal("price:", sf.to_s) sf = SortField.new(:price, {:type => :float, :reverse => true}) assert_equal("price:!", sf.to_s) sf = SortField.new(:content, {:type => :string}) assert_equal("content:", sf.to_s) sf = SortField.new(:content, {:type => :string, :reverse => true}) assert_equal("content:!", sf.to_s) sf = SortField.new(:auto_field, {:type => :auto}) assert_equal("auto_field:", sf.to_s) sf = SortField.new(:auto_field, {:type => :auto, :reverse => true}) assert_equal("auto_field:!", sf.to_s) end def test_sort_to_s() sort = Sort.new assert_equal("Sort[, ]", sort.to_s) sf = SortField.new(:auto_field, {:type => :auto, :reverse => true}) sort = Sort.new([sf, SortField::SCORE, SortField::DOC_ID]) assert_equal("Sort[auto_field:!, , ]", sort.to_s) sort = Sort.new([:one, :two, SortField::DOC_ID]) assert_equal("Sort[one:, two:, ]", sort.to_s) sort = Sort.new([:one, :two]) assert_equal("Sort[one:, two:, ]", sort.to_s) end def test_sorts() is = Searcher.new(@dir) q = TermQuery.new(:x, "findall") do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9]) do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort::RELEVANCE) do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], [SortField::SCORE]) do_test_top_docs(is, q, [0,1,2,3,4,5,6,7,8,9], Sort::INDEX_ORDER) do_test_top_docs(is, q, [0,1,2,3,4,5,6,7,8,9], [SortField::DOC_ID]) ## int sf_int = SortField.new(:int, {:type => :integer, :reverse => true}) do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3], [sf_int]) do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3], "int DESC") do_test_top_docs(is, q, [0,1,6,5,9,8,4,7,2,3], [sf_int, SortField::SCORE]) do_test_top_docs(is, q, [0,1,6,5,9,8,4,7,2,3], "int DESC, SCORE") sf_int = SortField.new(:int, {:type => :integer}) do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0], [sf_int]) do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0], "int") ## byte do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0], SortField.new(:int, :type => :byte)) do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3], [SortField.new(:int, :type => :byte, :reverse => true)]) ## float sf_float = SortField.new(:float, {:type => :float, :reverse => true}) do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new([sf_float, SortField::SCORE])) do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], "float DESC, SCORE") sf_float = SortField.new(:float, {:type => :float}) do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new([sf_float, SortField::SCORE])) do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], "float, SCORE") ## str sf_str = SortField.new(:string, {:type => :string}) do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,5,4], [sf_str, SortField::SCORE]) do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,4,5], "string") ## auto do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,4,5], Sort.new(:string)) do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0], Sort.new([:int])) do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new(:float)) do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], :float) do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new(:float, true)) do_test_top_docs(is, q, [0,6,1,5,9,4,8,7,2,3], Sort.new([:int, :string], true)) do_test_top_docs(is, q, [0,6,1,5,9,4,8,7,2,3], "int DESC, string DESC") do_test_top_docs(is, q, [3,2,7,8,4,9,5,1,6,0], Sort.new([:int, :string])) do_test_top_docs(is, q, [3,2,7,8,4,9,5,1,6,0], [:int, :string]) do_test_top_docs(is, q, [3,2,7,8,4,9,5,1,6,0], "int, string") end #LENGTH = SortField::SortType.new("length", lambda{|str| str.length}) #LENGTH_MODULO = SortField::SortType.new("length_mod", lambda{|str| str.length}, # lambda{|i, j| (i%4) <=> (j%4)}) #def test_special_sorts # is = IndexSearcher.new(@dir) # q = TermQuery.new(Term.new(:x, "findall")) # sf = SortField.new(:float, {:type => LENGTH, :reverse => true}) # do_test_top_docs(is, q, [9,6,4,8,2,7,0,5,1,3], [sf]) # sf = SortField.new(:float, {:type => LENGTH_MODULO, :reverse => true}) # do_test_top_docs(is, q, [1,3,6,4,8,2,7,0,5,9], [sf]) # sf = SortField.new(:float, {:type => LENGTH, # :reverse => true, # :comparator => lambda{|i,j| (j%4) <=> (i%4)}}) # do_test_top_docs(is, q, [0,5,9,2,7,4,8,1,3,6], [sf]) #end end ferret-0.11.8.6/test/unit/search/tc_filter.rb0000644000004100000410000001207612476264460021010 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" require 'date' class FilterTest < Test::Unit::TestCase include Ferret::Search include Ferret::Analysis include Ferret::Index def setup() @dir = Ferret::Store::RAMDirectory.new() iw = IndexWriter.new(:dir => @dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true) [ {:int => "0", :date => "20040601", :switch => "on"}, {:int => "1", :date => "20041001", :switch => "off"}, {:int => "2", :date => "20051101", :switch => "on"}, {:int => "3", :date => "20041201", :switch => "off"}, {:int => "4", :date => "20051101", :switch => "on"}, {:int => "5", :date => "20041201", :switch => "off"}, {:int => "6", :date => "20050101", :switch => "on"}, {:int => "7", :date => "20040701", :switch => "off"}, {:int => "8", :date => "20050301", :switch => "on"}, {:int => "9", :date => "20050401", :switch => "off"} ].each {|doc| iw << doc} iw.close end def teardown() @dir.close() end def do_test_top_docs(searcher, query, expected, filter) top_docs = searcher.search(query, {:filter => filter}) #puts top_docs assert_equal(expected.size, top_docs.hits.size) top_docs.total_hits.times do |i| assert_equal(expected[i], top_docs.hits[i].doc) end end def test_range_filter searcher = Searcher.new(@dir) q = MatchAllQuery.new() rf = RangeFilter.new(:int, :>= => "2", :<= => "6") do_test_top_docs(searcher, q, [2,3,4,5,6], rf) rf = RangeFilter.new(:int, :>= => "2", :< => "6") do_test_top_docs(searcher, q, [2,3,4,5], rf) rf = RangeFilter.new(:int, :> => "2", :<= => "6") do_test_top_docs(searcher, q, [3,4,5,6], rf) rf = RangeFilter.new(:int, :> => "2", :< => "6") do_test_top_docs(searcher, q, [3,4,5], rf) rf = RangeFilter.new(:int, :>= => "6") do_test_top_docs(searcher, q, [6,7,8,9], rf) rf = RangeFilter.new(:int, :> => "6") do_test_top_docs(searcher, q, [7,8,9], rf) rf = RangeFilter.new(:int, :<= => "2") do_test_top_docs(searcher, q, [0,1,2], rf) rf = RangeFilter.new(:int, :< => "2") do_test_top_docs(searcher, q, [0,1], rf) bits = rf.bits(searcher.reader) assert(bits[0]) assert(bits[1]) assert(!bits[2]) assert(!bits[3]) assert(!bits[4]) end def test_range_filter_errors assert_raise(ArgumentError) {f = RangeFilter.new(:f, :> => "b", :< => "a")} assert_raise(ArgumentError) {f = RangeFilter.new(:f, :include_lower => true)} assert_raise(ArgumentError) {f = RangeFilter.new(:f, :include_upper => true)} end def test_query_filter() searcher = Searcher.new(@dir) q = MatchAllQuery.new() qf = QueryFilter.new(TermQuery.new(:switch, "on")) do_test_top_docs(searcher, q, [0,2,4,6,8], qf) # test again to test caching doesn't break it do_test_top_docs(searcher, q, [0,2,4,6,8], qf) qf = QueryFilter.new(TermQuery.new(:switch, "off")) do_test_top_docs(searcher, q, [1,3,5,7,9], qf) bits = qf.bits(searcher.reader) assert(bits[1]) assert(bits[3]) assert(bits[5]) assert(bits[7]) assert(bits[9]) assert(!bits[0]) assert(!bits[2]) assert(!bits[4]) assert(!bits[6]) assert(!bits[8]) end def test_filtered_query searcher = Searcher.new(@dir) q = MatchAllQuery.new() rf = RangeFilter.new(:int, :>= => "2", :<= => "6") rq = FilteredQuery.new(q, rf) qf = QueryFilter.new(TermQuery.new(:switch, "on")) do_test_top_docs(searcher, rq, [2,4,6], qf) query = FilteredQuery.new(rq, qf) rf2 = RangeFilter.new(:int, :>= => "3") do_test_top_docs(searcher, query, [4,6], rf2) end class CustomFilter def bits(ir) bv = Ferret::Utils::BitVector.new bv[0] = bv[2] = bv[4] = true bv end end def test_custom_filter searcher = Searcher.new(@dir) q = MatchAllQuery.new filt = CustomFilter.new do_test_top_docs(searcher, q, [0, 2, 4], filt) end def test_filter_proc searcher = Searcher.new(@dir) q = MatchAllQuery.new() filter_proc = lambda {|doc, score, s| (s[doc][:int] % 2) == 0} top_docs = searcher.search(q, :filter_proc => filter_proc) top_docs.hits.each do |hit| assert_equal(0, searcher[hit.doc][:int] % 2) end end def test_score_modifying_filter_proc searcher = Searcher.new(@dir) q = MatchAllQuery.new() start_date = Date.parse('2008-02-08') date_half_life_50 = lambda do |doc, score, s| days = (start_date - Date.parse(s[doc][:date], '%Y%m%d')).to_i 1.0 / (2.0 ** (days.to_f / 50.0)) end top_docs = searcher.search(q, :filter_proc => date_half_life_50) docs = top_docs.hits.collect {|hit| hit.doc} assert_equal(docs, [2,4,9,8,6,3,5,1,7,0]) rev_date_half_life_50 = lambda do |doc, score, s| days = (start_date - Date.parse(s[doc][:date], '%Y%m%d')).to_i 1.0 - 1.0 / (2.0 ** (days.to_f / 50.0)) end top_docs = searcher.search(q, :filter_proc => rev_date_half_life_50) docs = top_docs.hits.collect {|hit| hit.doc} assert_equal(docs, [0,7,1,3,5,6,8,9,2,4]) end end ferret-0.11.8.6/test/unit/search/tc_index_searcher.rb0000644000004100000410000000403012476264460022475 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" require File.dirname(__FILE__) + "/tm_searcher" class SearcherTest < Test::Unit::TestCase include Ferret::Search include Ferret::Store include Ferret::Analysis include Ferret::Index include SearcherTests def setup() @dir = RAMDirectory.new() iw = IndexWriter.new(:dir => @dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true) @documents = IndexTestHelper::SEARCH_TEST_DOCS @documents.each { |doc| iw << doc } iw.close() @searcher = Searcher.new(@dir) end def teardown() @searcher.close @dir.close() end def get_docs(hits) docs = [] hits.each do |hit| docs << hit.doc end docs end def check_hits(query, expected, top=nil, total_hits=nil) options = {} options[:limit] = expected.size + 1 if (expected.size > 10) top_docs = @searcher.search(query, options) assert_equal(expected.length, top_docs.hits.size) assert_equal(top, top_docs.hits[0].doc) if top if total_hits assert_equal(total_hits, top_docs.total_hits) else assert_equal(expected.length, top_docs.total_hits) end top_docs.hits.each do |score_doc| assert(expected.include?(score_doc.doc), "#{score_doc.doc} was found unexpectedly") assert(score_doc.score.approx_eql?(@searcher.explain(query, score_doc.doc).score), "Scores(#{score_doc.score} != #{@searcher.explain(query, score_doc.doc).score})") end assert_equal(expected.sort, @searcher.scan(query)) if expected.size > 5 assert_equal(expected[0...5], @searcher.scan(query, :limit => 5)) assert_equal(expected[5..-1], @searcher.scan(query, :start_doc => expected[5])) end end def test_get_doc() assert_equal(18, @searcher.max_doc) assert_equal("20050930", @searcher.get_document(0)[:date]) assert_equal("cat1/sub2/subsub2", @searcher.get_document(4)[:category]) assert_equal("20051012", @searcher.get_document(12)[:date]) end end ferret-0.11.8.6/test/unit/search/tm_searcher.rb0000644000004100000410000003606112476264460021331 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" module SearcherTests include Ferret::Search def test_term_query tq = TermQuery.new(:field, "word2") tq.boost = 100 check_hits(tq, [1,4,8]) #puts @searcher.explain(tq, 1) #puts @searcher.explain(tq, 4) #puts @searcher.explain(tq, 8) tq = TermQuery.new(:field, "2342") check_hits(tq, []) tq = TermQuery.new(:field, "") check_hits(tq, []) tq = TermQuery.new(:field, "word1") top_docs = @searcher.search(tq) assert_equal(@searcher.max_doc, top_docs.total_hits) assert_equal(10, top_docs.hits.size) top_docs = @searcher.search(tq, {:limit => 20}) assert_equal(@searcher.max_doc, top_docs.hits.size) assert_equal([Ferret::Term.new(:field, "word1")], tq.terms(@searcher)) end def check_docs(query, options, expected=[]) top_docs = @searcher.search(query, options) docs = top_docs.hits assert_equal(expected.length, docs.length) docs.length.times do |i| assert_equal(expected[i], docs[i].doc) end if options[:limit] == :all and options[:offset] == nil assert_equal(expected.sort, @searcher.scan(query)) end end def test_offset tq = TermQuery.new(:field, "word1") tq.boost = 100 top_docs = @searcher.search(tq, {:limit => 100}) expected = [] top_docs.hits.each do |sd| expected << sd.doc end assert_raise(ArgumentError) { @searcher.search(tq, {:offset => -1}) } assert_raise(ArgumentError) { @searcher.search(tq, {:limit => 0}) } assert_raise(ArgumentError) { @searcher.search(tq, {:limit => -1}) } check_docs(tq, {:limit => 8, :offset => 0}, expected[0,8]) check_docs(tq, {:limit => 3, :offset => 1}, expected[1,3]) check_docs(tq, {:limit => 6, :offset => 2}, expected[2,6]) check_docs(tq, {:limit => 2, :offset => expected.length}, []) check_docs(tq, {:limit => 2, :offset => expected.length + 100}, []) check_docs(tq, {:limit => :all}, expected) check_docs(tq, {:limit => :all, :offset => 2}, expected[2..-1]) end def test_multi_term_query mtq = MultiTermQuery.new(:field, :max_terms => 4, :min_score => 0.5) check_hits(mtq, []) assert_equal('""', mtq.to_s(:field)) assert_equal('field:""', mtq.to_s) [ ["brown", 1.0, '"brown"'], ["fox", 0.1, '"brown"'], ["fox", 0.6, '"fox^0.6|brown"'], ["fast", 50.0, '"fox^0.6|brown|fast^50.0"'] ].each do |term, boost, str| mtq.add_term(term, boost) assert_equal(str, mtq.to_s(:field)) assert_equal("field:#{str}", mtq.to_s()) end mtq.boost = 80.1 assert_equal('field:"fox^0.6|brown|fast^50.0"^80.1', mtq.to_s()) mtq << "word1" assert_equal('field:"fox^0.6|brown|word1|fast^50.0"^80.1', mtq.to_s()) mtq << "word2" assert_equal('field:"brown|word1|word2|fast^50.0"^80.1', mtq.to_s()) mtq << "word3" assert_equal('field:"brown|word1|word2|fast^50.0"^80.1', mtq.to_s()) terms = mtq.terms(@searcher) assert(terms.index(Ferret::Term.new(:field, "brown"))) assert(terms.index(Ferret::Term.new(:field, "word1"))) assert(terms.index(Ferret::Term.new(:field, "word2"))) assert(terms.index(Ferret::Term.new(:field, "fast"))) end def test_boolean_query bq = BooleanQuery.new() tq1 = TermQuery.new(:field, "word1") tq2 = TermQuery.new(:field, "word3") bq.add_query(tq1, :must) bq.add_query(tq2, :must) check_hits(bq, [2,3,6,8,11,14], 14) tq3 = TermQuery.new(:field, "word2") bq.add_query(tq3, :should) check_hits(bq, [2,3,6,8,11,14], 8) bq = BooleanQuery.new() bq.add_query(tq2, :must) bq.add_query(tq3, :must_not) check_hits(bq, [2,3,6,11,14]) bq = BooleanQuery.new() bq.add_query(tq2, :must_not) check_hits(bq, [0,1,4,5,7,9,10,12,13,15,16,17]) bq = BooleanQuery.new() bq.add_query(tq2, :should) bq.add_query(tq3, :should) check_hits(bq, [1,2,3,4,6,8,11,14]) bq = BooleanQuery.new() bc1 = BooleanQuery::BooleanClause.new(tq2, :should) bc2 = BooleanQuery::BooleanClause.new(tq3, :should) bq << bc1 bq << bc2 check_hits(bq, [1,2,3,4,6,8,11,14]) end def test_phrase_query() pq = PhraseQuery.new(:field) assert_equal("\"\"", pq.to_s(:field)) assert_equal("field:\"\"", pq.to_s) pq << "quick" << "brown" << "fox" check_hits(pq, [1]) pq = PhraseQuery.new(:field, 1) pq << "quick" pq.add_term("fox", 2) check_hits(pq, [1,11,14,16]) pq.slop = 0 check_hits(pq, [1,11,14]) pq.slop = 1 check_hits(pq, [1,11,14,16]) pq.slop = 4 check_hits(pq, [1,11,14,16,17]) end def test_range_query() rq = RangeQuery.new(:date, :lower => "20051006", :upper => "20051010") check_hits(rq, [6,7,8,9,10]) rq = RangeQuery.new(:date, :>= => "20051006", :<= => "20051010") check_hits(rq, [6,7,8,9,10]) rq = RangeQuery.new(:date, :lower => "20051006", :upper => "20051010", :include_lower => false) check_hits(rq, [7,8,9,10]) rq = RangeQuery.new(:date, :> => "20051006", :<= => "20051010") check_hits(rq, [7,8,9,10]) rq = RangeQuery.new(:date, :lower => "20051006", :upper => "20051010", :include_upper => false) check_hits(rq, [6,7,8,9]) rq = RangeQuery.new(:date, :>= => "20051006", :< => "20051010") check_hits(rq, [6,7,8,9]) rq = RangeQuery.new(:date, :lower => "20051006", :upper => "20051010", :include_lower => false, :include_upper => false) check_hits(rq, [7,8,9]) rq = RangeQuery.new(:date, :> => "20051006", :< => "20051010") check_hits(rq, [7,8,9]) rq = RangeQuery.new(:date, :upper => "20051003") check_hits(rq, [0,1,2,3]) rq = RangeQuery.new(:date, :<= => "20051003") check_hits(rq, [0,1,2,3]) rq = RangeQuery.new(:date, :upper => "20051003", :include_upper => false) check_hits(rq, [0,1,2]) rq = RangeQuery.new(:date, :< => "20051003") check_hits(rq, [0,1,2]) rq = RangeQuery.new(:date, :lower => "20051014") check_hits(rq, [14,15,16,17]) rq = RangeQuery.new(:date, :>= => "20051014") check_hits(rq, [14,15,16,17]) rq = RangeQuery.new(:date, :lower => "20051014", :include_lower => false) check_hits(rq, [15,16,17]) rq = RangeQuery.new(:date, :> => "20051014") check_hits(rq, [15,16,17]) end def test_typed_range_query() rq = TypedRangeQuery.new(:number, :>= => "-1.0", :<= => 1.0) check_hits(rq, [0,1,4,10,15,17]) rq = TypedRangeQuery.new(:number, :> => "-1.0", :< => 1.0) check_hits(rq, [0,1,4,15]) if ENV['FERRET_DEV'] # text hexadecimal rq = TypedRangeQuery.new(:number, :> => "1.0", :<= =>"0xa") check_hits(rq, [6,7,9,12]) end # test single bound rq = TypedRangeQuery.new(:number, :<= => "0.0") check_hits(rq, [5,11,15,16,17]) # test single bound rq = TypedRangeQuery.new(:number, :> => "0.0") check_hits(rq, [0,1,2,3,4,6,7,8,9,10,12,13,14]) # below range - no results rq = TypedRangeQuery.new(:number, :> => "10051006", :< =>"10051010") check_hits(rq, []) # above range - no results rq = TypedRangeQuery.new(:number, :> => "-12518421", :< =>"-12518420") check_hits(rq, []) end def test_prefix_query() pq = PrefixQuery.new(:category, "cat1") check_hits(pq, [0, 1, 2, 3, 4, 13, 14, 15, 16, 17]) pq = PrefixQuery.new(:category, "cat1/sub2") check_hits(pq, [3, 4, 13, 15]) end def test_wildcard_query() wq = WildcardQuery.new(:category, "cat1*") check_hits(wq, [0, 1, 2, 3, 4, 13, 14, 15, 16, 17]) wq = WildcardQuery.new(:category, "cat1*/su??ub2") check_hits(wq, [4, 16]) wq = WildcardQuery.new(:category, "*/sub2*") check_hits(wq, [3, 4, 13, 15]) end def test_multi_phrase_query() mpq = PhraseQuery.new(:field) mpq << ["quick", "fast"] mpq << ["brown", "red", "hairy"] mpq << "fox" check_hits(mpq, [1, 8, 11, 14]) mpq.slop = 4 check_hits(mpq, [1, 8, 11, 14, 16, 17]) end def test_highlighter() dir = Ferret::Store::RAMDirectory.new iw = Ferret::Index::IndexWriter.new(:dir => dir, :analyzer => Ferret::Analysis::WhiteSpaceAnalyzer.new()) long_text = "big " + "between " * 2000 + 'house' [ {:field => "the words we are searching for are one and two also " + "sometimes looking for them as a phrase like this; one " + "two lets see how it goes"}, {:long => 'before ' * 1000 + long_text + ' after' * 1000}, {:dates => '20070505 20071230 20060920 20081111'}, ].each {|doc| iw << doc } iw.close searcher = Searcher.new(dir) q = TermQuery.new(:field, "one"); highlights = searcher.highlight(q, 0, :field, :excerpt_length => 10, :num_excerpts => 1) assert_equal(1, highlights.size) assert_equal("...are one...", highlights[0]) highlights = searcher.highlight(q, 0, :field, :excerpt_length => 10, :num_excerpts => 2) assert_equal(2, highlights.size) assert_equal("...are one...", highlights[0]) assert_equal("...this; one...", highlights[1]) highlights = searcher.highlight(q, 0, :field, :excerpt_length => 10, :num_excerpts => 3) assert_equal(3, highlights.size) assert_equal("the words...", highlights[0]) assert_equal("...are one...", highlights[1]) assert_equal("...this; one...", highlights[2]) highlights = searcher.highlight(q, 0, :field, :excerpt_length => 10, :num_excerpts => 4) assert_equal(3, highlights.size) assert_equal("the words we are...", highlights[0]) assert_equal("...are one...", highlights[1]) assert_equal("...this; one...", highlights[2]) highlights = searcher.highlight(q, 0, :field, :excerpt_length => 10, :num_excerpts => 5) assert_equal(2, highlights.size) assert_equal("the words we are searching for are one...", highlights[0]) assert_equal("...this; one...", highlights[1]) highlights = searcher.highlight(q, 0, :field, :excerpt_length => 10, :num_excerpts => 20) assert_equal(1, highlights.size) assert_equal("the words we are searching for are one and two also " + "sometimes looking for them as a phrase like this; one " + "two lets see how it goes", highlights[0]) highlights = searcher.highlight(q, 0, :field, :excerpt_length => 1000, :num_excerpts => 1) assert_equal(1, highlights.size) assert_equal("the words we are searching for are one and two also " + "sometimes looking for them as a phrase like this; one " + "two lets see how it goes", highlights[0]) q = BooleanQuery.new(false) q << TermQuery.new(:field, "one") q << TermQuery.new(:field, "two") highlights = searcher.highlight(q, 0, :field, :excerpt_length => 15, :num_excerpts => 2) assert_equal(2, highlights.size) assert_equal("...one and two...", highlights[0]) assert_equal("...this; one two...", highlights[1]) q << (PhraseQuery.new(:field) << "one" << "two") highlights = searcher.highlight(q, 0, :field, :excerpt_length => 15, :num_excerpts => 2) assert_equal(2, highlights.size) assert_equal("...one and two...", highlights[0]) assert_equal("...this; one two...", highlights[1]) highlights = searcher.highlight(q, 0, :field, :excerpt_length => 15, :num_excerpts => 1) assert_equal(1, highlights.size) # should have a higher priority since it the merger of three matches assert_equal("...this; one two...", highlights[0]) highlights = searcher.highlight(q, 0, :not_a_field, :excerpt_length => 15, :num_excerpts => 1) assert_nil(highlights) q = TermQuery.new(:wrong_field, "one") highlights = searcher.highlight(q, 0, :wrong_field, :excerpt_length => 15, :num_excerpts => 1) assert_nil(highlights) q = BooleanQuery.new(false) q << (PhraseQuery.new(:field) << "the" << "words") q << (PhraseQuery.new(:field) << "for" << "are" << "one" << "and" << "two") q << TermQuery.new(:field, "words") q << TermQuery.new(:field, "one") q << TermQuery.new(:field, "two") highlights = searcher.highlight(q, 0, :field, :excerpt_length => 10, :num_excerpts => 1) assert_equal(1, highlights.size) assert_equal("the words...", highlights[0]) highlights = searcher.highlight(q, 0, :field, :excerpt_length => 10, :num_excerpts => 2) assert_equal(2, highlights.size) assert_equal("the words...", highlights[0]) assert_equal("...one two...", highlights[1]) [ [RangeQuery.new(:dates, :>= => '20081111'), '20070505 20071230 20060920 20081111'], [RangeQuery.new(:dates, :>= => '20070101'), '20070505 20071230 20060920 20081111'], [PrefixQuery.new(:dates, '2007'), '20070505 20071230 20060920 20081111'], ].each do |query, expected| assert_equal([expected], searcher.highlight(query, 2, :dates)) end #q = PhraseQuery.new(:long) << 'big' << 'house' #q.slop = 4000 #highlights = searcher.highlight(q, 1, :long, # :excerpt_length => 400, # :num_excerpts => 2) #assert_equal(1, highlights.size) #puts highlights[0] #assert_equal("the words...", highlights[0]) #assert_equal("...one two...", highlights[1]) end def test_highlighter_with_standard_analyzer() dir = Ferret::Store::RAMDirectory.new iw = Ferret::Index::IndexWriter.new(:dir => dir, :analyzer => Ferret::Analysis::StandardAnalyzer.new()) [ {:field => "field has a url http://ferret.davebalmain.com/trac/ end"}, ].each {|doc| iw << doc } iw.close searcher = Searcher.new(dir) q = TermQuery.new(:field, "ferret.davebalmain.com/trac"); highlights = searcher.highlight(q, 0, :field, :excerpt_length => 1000, :num_excerpts => 1) assert_equal(1, highlights.size) assert_equal("field has a url http://ferret.davebalmain.com/trac/ end", highlights[0]) end end ferret-0.11.8.6/test/unit/search/tc_sort.rb0000644000004100000410000000274612476264460020515 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" class SortTest < Test::Unit::TestCase include Ferret::Search def test_basic() s = Sort::RELEVANCE assert_equal(2, s.fields.size) assert_equal(SortField::SCORE, s.fields[0]) assert_equal(SortField::DOC_ID, s.fields[1]) s = Sort::INDEX_ORDER assert_equal(1, s.fields.size) assert_equal(SortField::DOC_ID, s.fields[0]) end def test_string_init() s = Sort.new(:field) assert_equal(2, s.fields.size) assert_equal(:auto, s.fields[0].type) assert_equal(:field, s.fields[0].name) assert_equal(SortField::DOC_ID, s.fields[1]) s = Sort.new([:field1, :field2, :field3]) assert_equal(4, s.fields.size) assert_equal(:auto, s.fields[0].type) assert_equal(:field1, s.fields[0].name) assert_equal(:auto, s.fields[1].type) assert_equal(:field2, s.fields[1].name) assert_equal(:auto, s.fields[2].type) assert_equal(:field3, s.fields[2].name) assert_equal(SortField::DOC_ID, s.fields[3]) end def test_multi_fields() sf1 = SortField.new(:field, {:type => :integer, :reverse => true}) sf2 = SortField::SCORE sf3 = SortField::DOC_ID s = Sort.new([sf1, sf2, sf3]) assert_equal(3, s.fields.size) assert_equal(:integer, s.fields[0].type) assert_equal(:field, s.fields[0].name) assert(s.fields[0].reverse?) assert_equal(SortField::SCORE, s.fields[1]) assert_equal(SortField::DOC_ID, s.fields[2]) end end ferret-0.11.8.6/test/unit/search/tc_sort_field.rb0000644000004100000410000000121512476264460021646 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" class SortFieldTest < Test::Unit::TestCase include Ferret::Search def test_field_score() fs = SortField::SCORE assert_equal(:score, fs.type) assert_nil(fs.name) assert(!fs.reverse?, "SCORE_ID should not be reverse") assert_nil(fs.comparator) end def test_field_doc() fs = SortField::DOC_ID assert_equal(:doc_id, fs.type) assert_nil(fs.name) assert(!fs.reverse?, "DOC_ID should be reverse") assert_nil(fs.comparator) end def test_error_raised() assert_raise(ArgumentError) { fs = SortField.new(nil, :type => :integer) } end end ferret-0.11.8.6/test/unit/search/tc_spans.rb0000644000004100000410000001642112476264460020645 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" class SpansBasicTest < Test::Unit::TestCase include Ferret::Search include Ferret::Store include Ferret::Index include Ferret::Search::Spans include Ferret::Analysis def setup() @dir = RAMDirectory.new iw = IndexWriter.new(:dir => @dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true) [ "start finish one two three four five six seven", "start one finish two three four five six seven", "start one two finish three four five six seven flip", "start one two three finish four five six seven", "start one two three four finish five six seven", "start one two three four five finish six seven", "start one two three four five six finish seven eight", "start one two three four five six seven finish eight nine", "start one two three four five six finish seven eight", "start one two three four five finish six seven", "start one two three four finish five six seven", "start one two three finish four five six seven", "start one two finish three four five six seven flop", "start one finish two three four five six seven", "start finish one two three four five six seven", "start start one two three four five six seven", "finish start one two three four five six seven", "finish one start two three four five six seven toot", "finish one two start three four five six seven", "finish one two three start four five six seven", "finish one two three four start five six seven", "finish one two three four five start six seven", "finish one two three four five six start seven eight", "finish one two three four five six seven start eight nine", "finish one two three four five six start seven eight", "finish one two three four five start six seven", "finish one two three four start five six seven", "finish one two three start four five six seven", "finish one two start three four five six seven", "finish one start two three four five six seven", "finish start one two three four five six seven" ].each { |line| iw << {:field => line} } iw.close() @searcher = Searcher.new(@dir) end def teardown() @searcher.close @dir.close end def number_split(i) if (i < 10) return "<#{i}>" elsif (i < 100) return "<#{((i/10)*10)}> <#{i%10}>" else return "<#{((i/100)*100)}> <#{(((i%100)/10)*10)}> <#{i%10}>" end end def check_hits(query, expected, test_explain = false, top=nil) top_docs = @searcher.search(query, {:limit => expected.length + 1}) assert_equal(expected.length, top_docs.hits.size) assert_equal(top, top_docs.hits[0].doc) if top assert_equal(expected.length, top_docs.total_hits) top_docs.hits.each do |hit| assert(expected.include?(hit.doc), "#{hit.doc} was found unexpectedly") if test_explain assert(hit.score.approx_eql?(@searcher.explain(query, hit.doc).score), "Scores(#{hit.score} != " + "#{@searcher.explain(query, hit.doc).score})") end end end def test_span_term_query() tq = SpanTermQuery.new(:field, "nine") check_hits(tq, [7,23], true) tq = SpanTermQuery.new(:field, "eight") check_hits(tq, [6,7,8,22,23,24]) end def test_span_multi_term_query() tq = SpanMultiTermQuery.new(:field, ["eight", "nine"]) check_hits(tq, [6,7,8,22,23,24], true) tq = SpanMultiTermQuery.new(:field, ["flip", "flop", "toot", "nine"]) check_hits(tq, [2,7,12,17,23]) end def test_span_prefix_query() tq = SpanPrefixQuery.new(:field, "fl") check_hits(tq, [2, 12], true) end def test_span_near_query() tq1 = SpanTermQuery.new(:field, "start") tq2 = SpanTermQuery.new(:field, "finish") q = SpanNearQuery.new(:clauses => [tq1, tq2], :in_order => true) check_hits(q, [0,14], true) q = SpanNearQuery.new() q << tq1 << tq2 check_hits(q, [0,14,16,30], true) q = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 1, :in_order => true) check_hits(q, [0,1,13,14]) q = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 1) check_hits(q, [0,1,13,14,16,17,29,30]) q = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 4, :in_order => true) check_hits(q, [0,1,2,3,4,10,11,12,13,14]) q = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 4) check_hits(q, [0,1,2,3,4,10,11,12,13,14,16,17,18,19,20,26,27,28,29,30]) q = SpanNearQuery.new(:clauses => [ SpanPrefixQuery.new(:field, 'se'), SpanPrefixQuery.new(:field, 'fl')], :slop => 0) check_hits(q, [2, 12], true) end def test_span_not_query() tq1 = SpanTermQuery.new(:field, "start") tq2 = SpanTermQuery.new(:field, "finish") tq3 = SpanTermQuery.new(:field, "two") tq4 = SpanTermQuery.new(:field, "five") nearq1 = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 4, :in_order => true) nearq2 = SpanNearQuery.new(:clauses => [tq3, tq4], :slop => 4, :in_order => true) q = SpanNotQuery.new(nearq1, nearq2) check_hits(q, [0,1,13,14], true) nearq1 = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 4) q = SpanNotQuery.new(nearq1, nearq2) check_hits(q, [0,1,13,14,16,17,29,30]) nearq1 = SpanNearQuery.new(:clauses => [tq1, tq3], :slop => 4, :in_order => true) nearq2 = SpanNearQuery.new(:clauses => [tq2, tq4], :slop => 8) q = SpanNotQuery.new(nearq1, nearq2) check_hits(q, [2,3,4,5,6,7,8,9,10,11,12,15]) end def test_span_first_query() finish_first = [16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] tq = SpanTermQuery.new(:field, "finish") q = SpanFirstQuery.new(tq, 1) check_hits(q, finish_first, true) q = SpanFirstQuery.new(tq, 5) check_hits(q, [0,1,2,3,11,12,13,14]+finish_first, false) end def test_span_or_query_query() tq1 = SpanTermQuery.new(:field, "start") tq2 = SpanTermQuery.new(:field, "finish") tq3 = SpanTermQuery.new(:field, "five") nearq1 = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 1, :in_order => true) nearq2 = SpanNearQuery.new(:clauses => [tq2, tq3], :slop => 0) q = SpanOrQuery.new([nearq1, nearq2]) check_hits(q, [0,1,4,5,9,10,13,14], false) nearq1 = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 0) nearq2 = SpanNearQuery.new(:clauses => [tq2, tq3], :slop => 1) q = SpanOrQuery.new([nearq1, nearq2]) check_hits(q, [0,3,4,5,6,8,9,10,11,14,16,30], false) end def test_span_prefix_query_max_terms @dir = RAMDirectory.new iw = IndexWriter.new(:dir => @dir, :analyzer => WhiteSpaceAnalyzer.new()) 2000.times { |i| iw << {:field => "prefix#{i} term#{i}"} } iw.close() @searcher = Searcher.new(@dir) pq = SpanPrefixQuery.new(:field, "prefix") tq = SpanTermQuery.new(:field, "term1500") q = SpanNearQuery.new(:clauses => [pq, tq], :in_order => true) check_hits(q, [], false) pq = SpanPrefixQuery.new(:field, "prefix", 2000) q = SpanNearQuery.new(:clauses => [pq, tq], :in_order => true) check_hits(q, [1500], false) end end ferret-0.11.8.6/test/unit/search/tc_multiple_search_requests.rb0000644000004100000410000000310612476264460024630 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" class MultipleSearchRequestsTest < Test::Unit::TestCase include Ferret::Search include Ferret::Store include Ferret::Analysis include Ferret::Index def setup() dpath = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir')) fs_dir = Ferret::Store::FSDirectory.new(dpath, true) iw = IndexWriter.new(:dir => fs_dir, :create => true, :key => [:id]) 1000.times do |x| doc = {:id => x} iw << doc end iw.close() fs_dir.close() @ix = Index.new(:path => dpath, :create => true, :key => [:id]) end def tear_down() @ix.close end def test_repeated_queries_segmentation_fault 1000.times do |x| bq = BooleanQuery.new() tq1 = TermQuery.new(:id, 1) tq2 = TermQuery.new(:another_id, 1) bq.add_query(tq1, :must) bq.add_query(tq2, :must) top_docs = @ix.search(bq) end end def test_repeated_queries_bus_error 1000.times do |x| bq = BooleanQuery.new() tq1 = TermQuery.new(:id, '1') tq2 = TermQuery.new(:another_id, '1') tq3 = TermQuery.new(:yet_another_id, '1') tq4 = TermQuery.new(:still_another_id, '1') tq5 = TermQuery.new(:one_more_id, '1') tq6 = TermQuery.new(:and_another_id, '1') bq.add_query(tq1, :must) bq.add_query(tq2, :must) bq.add_query(tq3, :must) bq.add_query(tq4, :must) bq.add_query(tq5, :must) bq.add_query(tq6, :must) top_docs = @ix.search(bq) end end end ferret-0.11.8.6/test/unit/search/tc_fuzzy_query.rb0000644000004100000410000001033412476264460022132 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" class FuzzyQueryTest < Test::Unit::TestCase include Ferret::Search include Ferret::Store include Ferret::Analysis include Ferret::Index def add_doc(text, writer) writer << {:field => text} end def setup() @dir = RAMDirectory.new() end def teardown() @dir.close() end def do_test_top_docs(is, query, expected) top_docs = is.search(query) assert_equal(expected.length, top_docs.total_hits, "expected #{expected.length} hits but got #{top_docs.total_hits}") assert_equal(expected.length, top_docs.hits.size) top_docs.total_hits.times do |i| assert_equal(expected[i], top_docs.hits[i].doc) end end def do_prefix_test(is, text, prefix, expected) fq = FuzzyQuery.new(:field, text, :prefix_length => prefix) #puts is.explain(fq, 0) #puts is.explain(fq, 1) do_test_top_docs(is, fq, expected) end def test_fuzziness() iw = IndexWriter.new(:dir => @dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true) add_doc("aaaaa", iw) add_doc("aaaab", iw) add_doc("aaabb", iw) add_doc("aabbb", iw) add_doc("abbbb", iw) add_doc("bbbbb", iw) add_doc("ddddd", iw) add_doc("ddddddddddddddddddddd", iw) # test max_distances problem add_doc("aaaaaaaaaaaaaaaaaaaaaaa", iw) # test max_distances problem #iw.optimize() iw.close() is = Searcher.new(@dir) fq = FuzzyQuery.new(:field, "aaaaa", :prefix_length => 5) do_prefix_test(is, "aaaaaaaaaaaaaaaaaaaaaa", 1, [8]) do_prefix_test(is, "aaaaa", 0, [0,1,2]) do_prefix_test(is, "aaaaa", 1, [0,1,2]) do_prefix_test(is, "aaaaa", 2, [0,1,2]) do_prefix_test(is, "aaaaa", 3, [0,1,2]) do_prefix_test(is, "aaaaa", 4, [0,1]) do_prefix_test(is, "aaaaa", 5, [0]) do_prefix_test(is, "aaaaa", 6, [0]) do_prefix_test(is, "xxxxx", 0, []) do_prefix_test(is, "aaccc", 0, []) do_prefix_test(is, "aaaac", 0, [0,1,2]) do_prefix_test(is, "aaaac", 1, [0,1,2]) do_prefix_test(is, "aaaac", 2, [0,1,2]) do_prefix_test(is, "aaaac", 3, [0,1,2]) do_prefix_test(is, "aaaac", 4, [0,1]) do_prefix_test(is, "aaaac", 5, []) do_prefix_test(is, "ddddX", 0, [6]) do_prefix_test(is, "ddddX", 1, [6]) do_prefix_test(is, "ddddX", 2, [6]) do_prefix_test(is, "ddddX", 3, [6]) do_prefix_test(is, "ddddX", 4, [6]) do_prefix_test(is, "ddddX", 5, []) fq = FuzzyQuery.new(:anotherfield, "ddddX", :prefix_length => 0) top_docs = is.search(fq) assert_equal(0, top_docs.total_hits) is.close() end def test_fuzziness_long() iw = IndexWriter.new(:dir => @dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true) add_doc("aaaaaaa", iw) add_doc("segment", iw) iw.optimize() iw.close() is = Searcher.new(@dir) # not similar enough: do_prefix_test(is, "xxxxx", 0, []) # edit distance to "aaaaaaa" = 3, this matches because the string is longer than # in testDefaultFuzziness so a bigger difference is allowed: do_prefix_test(is, "aaaaccc", 0, [0]) # now with prefix do_prefix_test(is, "aaaaccc", 1, [0]) do_prefix_test(is, "aaaaccc", 4, [0]) do_prefix_test(is, "aaaaccc", 5, []) # no match, more than half of the characters is wrong: do_prefix_test(is, "aaacccc", 0, []) # now with prefix do_prefix_test(is, "aaacccc", 1, []) # "student" and "stellent" are indeed similar to "segment" by default: do_prefix_test(is, "student", 0, [1]) do_prefix_test(is, "stellent", 0, [1]) # now with prefix do_prefix_test(is, "student", 2, []) do_prefix_test(is, "stellent", 2, []) # "student" doesn't match anymore thanks to increased minimum similarity: fq = FuzzyQuery.new(:field, "student", :min_similarity => 0.6, :prefix_length => 0) top_docs = is.search(fq) assert_equal(0, top_docs.total_hits) assert_raise(ArgumentError) do fq = FuzzyQuery.new(:f, "s", :min_similarity => 1.1) end assert_raise(ArgumentError) do fq = FuzzyQuery.new(:f, "s", :min_similarity => -0.1) end is.close() end end ferret-0.11.8.6/test/unit/search/tc_multi_searcher.rb0000644000004100000410000001065112476264460022526 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" require File.join(File.dirname(__FILE__), "tc_index_searcher.rb") # make sure a MultiSearcher searching only one index # passes all the Searcher tests class SimpleMultiSearcherTest < SearcherTest alias :old_setup :setup def setup() old_setup @searcher = MultiSearcher.new([Searcher.new(@dir)]) end end # checks query results of a multisearcher searching two indexes # against those of a single indexsearcher searching the same # set of documents class MultiSearcherTest < Test::Unit::TestCase include Ferret::Search include Ferret::Store include Ferret::Analysis include Ferret::Index include SearcherTests DOCUMENTS1 = [ {"date" => "20050930", :field => "word1", "cat" => "cat1/"}, {"date" => "20051001", :field => "word1 word2 the quick brown fox", "cat" => "cat1/sub1"}, {"date" => "20051002", :field => "word1 word3", "cat" => "cat1/sub1/subsub1"}, {"date" => "20051003", :field => "word1 word3", "cat" => "cat1/sub2"}, {"date" => "20051004", :field => "word1 word2", "cat" => "cat1/sub2/subsub2"}, {"date" => "20051005", :field => "word1", "cat" => "cat2/sub1"}, {"date" => "20051006", :field => "word1 word3", "cat" => "cat2/sub1"}, {"date" => "20051007", :field => "word1", "cat" => "cat2/sub1"}, {"date" => "20051008", :field => "word1 word2 word3 the fast brown fox", "cat" => "cat2/sub1"} ] DOCUMENTS2 = [ {"date" => "20051009", :field => "word1", "cat" => "cat3/sub1"}, {"date" => "20051010", :field => "word1", "cat" => "cat3/sub1"}, {"date" => "20051011", :field => "word1 word3 the quick red fox", "cat" => "cat3/sub1"}, {"date" => "20051012", :field => "word1", "cat" => "cat3/sub1"}, {"date" => "20051013", :field => "word1", "cat" => "cat1/sub2"}, {"date" => "20051014", :field => "word1 word3 the quick hairy fox", "cat" => "cat1/sub1"}, {"date" => "20051015", :field => "word1", "cat" => "cat1/sub2/subsub1"}, {"date" => "20051016", :field => "word1 the quick fox is brown and hairy and a little red", "cat" => "cat1/sub1/subsub2"}, {"date" => "20051017", :field => "word1 the brown fox is quick and red", "cat" => "cat1/"} ] def setup() # create MultiSearcher from two seperate searchers dir1 = RAMDirectory.new() iw1 = IndexWriter.new(:dir => dir1, :analyzer => WhiteSpaceAnalyzer.new(), :create => true) DOCUMENTS1.each { |doc| iw1 << doc } iw1.close() dir2 = RAMDirectory.new() iw2 = IndexWriter.new(:dir => dir2, :analyzer => WhiteSpaceAnalyzer.new(), :create => true) DOCUMENTS2.each { |doc| iw2 << doc } iw2.close() @searcher = Ferret::Search::MultiSearcher.new([Searcher.new(dir1), Searcher.new(dir2)]) # create single searcher dir = RAMDirectory.new iw = IndexWriter.new(:dir => dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true) DOCUMENTS1.each { |doc| iw << doc } DOCUMENTS2.each { |doc| iw << doc } iw.close @single = Searcher.new(dir) #@query_parser = Ferret::QueryParser.new([:date, :field, :cat], :analyzer => WhiteSpaceAnalyzer.new()) end def teardown() @searcher.close @single.close end def check_hits(query, ignore1, ignore2 = nil, ignore3 = nil) multi_docs = @searcher.search(query) single_docs = @single.search(query) assert_equal(single_docs.hits.size, multi_docs.hits.size, 'hit count') assert_equal(single_docs.total_hits, multi_docs.total_hits, 'hit count') multi_docs.hits.each_with_index { |sd, id| assert_equal(single_docs.hits[id].doc, sd.doc) assert(single_docs.hits[id].score.approx_eql?(sd.score), "#{single_docs.hits[id]} != #{sd.score}") } end def test_get_doc() assert_equal(18, @searcher.max_doc) assert_equal("20050930", @searcher.get_document(0)[:date]) assert_equal("cat1/sub2/subsub2", @searcher[4][:cat]) assert_equal("20051012", @searcher.get_document(12)[:date]) assert_equal(18, @single.max_doc) assert_equal("20050930", @single.get_document(0)[:date]) assert_equal("cat1/sub2/subsub2", @single[4][:cat]) assert_equal("20051012", @single.get_document(12)[:date]) end end ferret-0.11.8.6/test/unit/store/0000755000004100000410000000000012476264460016371 5ustar www-datawww-dataferret-0.11.8.6/test/unit/store/tc_fs_store.rb0000755000004100000410000000611712476264460021240 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" require File.dirname(__FILE__) + "/tm_store" require File.dirname(__FILE__) + "/tm_store_lock" require 'fileutils' class FSStoreTest < Test::Unit::TestCase include Ferret::Store include StoreTest include StoreLockTest def setup @dpath = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir')) @dir = FSDirectory.new(@dpath, true) end def teardown @dir.close() Dir[File.join(@dpath, "*")].each {|path| begin File.delete(path) rescue nil end} end def test_fslock lock_name = "_file.f1" lock_file_path = make_lock_file_path(lock_name) assert(! File.exists?(lock_file_path), "There should be no lock file") lock = @dir.make_lock(lock_name) assert(! File.exists?(lock_file_path), "There should still be no lock file") assert(! lock.locked?, "lock shouldn't be locked yet") lock.obtain assert(lock.locked?, "lock should now be locked") assert(File.exists?(lock_file_path), "A lock file should have been created") assert(@dir.exists?(lfname(lock_name)),"The lock should exist") lock.release assert(! lock.locked?, "lock should be freed again") assert(! File.exists?(lock_file_path), "The lock file should have been deleted") end # def make_and_loose_lock # lock = @dir.make_lock("finalizer_lock") # lock.obtain # lock = nil # end # # def test_fslock_finalizer # lock_name = "finalizer_lock" # lock_file_path = make_lock_file_path(lock_name) # assert(! File.exists?(lock_file_path), "There should be no lock file") # # make_and_loose_lock # # #assert(File.exists?(lock_file_path), "There should now be a lock file") # # lock = @dir.make_lock(lock_name) # assert(lock.locked?, "lock should now be locked") # # GC.start # # assert(! lock.locked?, "lock should be freed again") # assert(! File.exists?(lock_file_path), "The lock file should have been deleted") # end # def test_permissions _S_IRGRP = 0040 _S_IWGRP = 0020 dpath = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir_permissions')) FileUtils.mkdir_p(dpath) dstat = File.stat(dpath) File.chown(nil, `id -G`.split.last.to_i, dpath) File.chmod(dstat.mode | _S_IRGRP | _S_IWGRP, dpath) dir = FSDirectory.new(dpath, true) file_name = 'test_permissions' file_path = File.join(dpath, file_name) dir.touch(file_name) mode = File.stat(file_path).mode assert(mode & _S_IRGRP == _S_IRGRP, "file should be group-readable") assert(mode & _S_IWGRP == _S_IWGRP, "file should be group-writable") ensure if dstat File.chown(nil, dstat.gid, dpath) File.chmod(dstat.mode, dpath) end if dir dir.refresh() dir.close() end end def make_lock_file_path(name) lock_file_path = File.join(@dpath, lfname(name)) if File.exists?(lock_file_path) then File.delete(lock_file_path) end return lock_file_path end def lfname(name) "ferret-#{name}.lck" end end ferret-0.11.8.6/test/unit/store/tm_store_lock.rb0000755000004100000410000000300012476264460021556 0ustar www-datawww-datamodule StoreLockTest class Switch def Switch.counter() return @@counter end def Switch.counter=(counter) @@counter = counter end end def test_locking() lock_time_out = 0.001 # we want this test to run quickly lock1 = @dir.make_lock("l.lck") lock2 = @dir.make_lock("l.lck") assert(!lock2.locked?) assert(lock1.obtain(lock_time_out)) assert(lock2.locked?) assert(! can_obtain_lock?(lock2, lock_time_out)) exception_thrown = false begin lock2.while_locked(lock_time_out) do assert(false, "lock should not have been obtained") end rescue exception_thrown = true ensure assert(exception_thrown) end lock1.release() assert(lock2.obtain(lock_time_out)) lock2.release() Switch.counter = 0 t = Thread.new() do lock1.while_locked(lock_time_out) do Switch.counter = 1 # make sure lock2 obtain test was run while Switch.counter < 2 end Switch.counter = 3 end end t.run() #make sure thread has started and lock been obtained while Switch.counter < 1 end assert(! can_obtain_lock?(lock2, lock_time_out), "lock 2 should not be obtainable") Switch.counter = 2 while Switch.counter < 3 end assert(lock2.obtain(lock_time_out)) lock2.release() end def can_obtain_lock?(lock, lock_time_out) begin lock.obtain(lock_time_out) return true rescue Exception=>e end return false end end ferret-0.11.8.6/test/unit/store/tc_ram_store.rb0000755000004100000410000000213312476264460021401 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" require File.dirname(__FILE__) + "/tm_store" require File.dirname(__FILE__) + "/tm_store_lock" class RAMStoreTest < Test::Unit::TestCase include StoreTest include StoreLockTest def setup @dir = Ferret::Store::RAMDirectory.new end def teardown @dir.close() end def test_ramlock name = "lfile" lfile = Ferret::Store::Directory::LOCK_PREFIX + name + ".lck" assert(! @dir.exists?(lfile), "There should be no lock file") lock = @dir.make_lock(name) assert(! @dir.exists?(lfile), "There should still be no lock file") assert(! @dir.exists?(lfile), "The lock should be hidden by the FSDirectories directory scan") assert(! lock.locked?, "lock shouldn't be locked yet") lock.obtain assert(lock.locked?, "lock should now be locked") assert(@dir.exists?(lfile), "A lock file should have been created") lock.release assert(! lock.locked?, "lock should be freed again") assert(! @dir.exists?(lfile), "The lock file should have been deleted") end end ferret-0.11.8.6/test/unit/store/tm_store.rb0000755000004100000410000000236112476264460020557 0ustar www-datawww-datamodule StoreTest # declare dir so inheritors can access it. attr_accessor :dir # test the basic file manipulation methods; # - exists? # - touch # - delete # - file_count def test_basic_file_ops assert_equal(0, @dir.file_count(), "directory should be empty") assert(! @dir.exists?('filename'), "File should not exist") @dir.touch('tmpfile1') assert_equal(1, @dir.file_count(), "directory should have one file") @dir.touch('tmpfile2') assert_equal(2, @dir.file_count(), "directory should have two files") assert(@dir.exists?('tmpfile1'), "'tmpfile1' should exist") @dir.delete('tmpfile1') assert(! @dir.exists?('tmpfile1'), "'tmpfile1' should no longer exist") assert_equal(1, @dir.file_count(), "directory should have one file") end def test_rename @dir.touch("from") assert(@dir.exists?('from'), "File should exist") assert(! @dir.exists?('to'), "File should not exist") cnt_before = @dir.file_count() @dir.rename('from', 'to') cnt_after = @dir.file_count() assert_equal(cnt_before, cnt_after, "the number of files shouldn't have changed") assert(@dir.exists?('to'), "File should now exist") assert(! @dir.exists?('from'), "File should no longer exist") end end ferret-0.11.8.6/test/unit/ts_analysis.rb0000755000004100000410000000013612476264460020116 0ustar www-datawww-datarequire File.join(File.dirname(__FILE__), "../test_helper.rb") load_test_dir('unit/analysis') ferret-0.11.8.6/test/unit/analysis/0000755000004100000410000000000012476264460017060 5ustar www-datawww-dataferret-0.11.8.6/test/unit/analysis/tc_token_stream.rb0000644000004100000410000006563612476264460022606 0ustar www-datawww-data# encoding: utf-8 require File.dirname(__FILE__) + "/../../test_helper" puts "Loading once" class TokenTest < Test::Unit::TestCase include Ferret::Analysis def test_token t = Token.new("text", 1, 2, 3) assert_equal("text", t.text) assert_equal(1, t.start) assert_equal(2, t.end) assert_equal(3, t.pos_inc) t.text = "yada yada yada" t.start = 11 t.end = 12 t.pos_inc = 13 assert_equal("yada yada yada", t.text) assert_equal(11, t.start) assert_equal(12, t.end) assert_equal(13, t.pos_inc) t = Token.new("text", 1, 2) assert_equal(1, t.pos_inc) end end class AsciiLetterTokenizerTest < Test::Unit::TestCase include Ferret::Analysis def test_letter_tokenizer() input = 'DBalmain@gmail.com is My e-mail 523@#$ ADDRESS. 23#!$' t = AsciiLetterTokenizer.new(input) assert_equal(Token.new("DBalmain", 0, 8), t.next()) assert_equal(Token.new("gmail", 9, 14), t.next()) assert_equal(Token.new("com", 15, 18), t.next()) assert_equal(Token.new("is", 19, 21), t.next()) assert_equal(Token.new("My", 22, 24), t.next()) assert_equal(Token.new("e", 25, 26), t.next()) assert_equal(Token.new("mail", 27, 31), t.next()) assert_equal(Token.new("ADDRESS", 39, 46), t.next()) assert(! t.next()) t.text = "one_two three" assert_equal(Token.new("one", 0, 3), t.next()) assert_equal(Token.new("two", 4, 7), t.next()) assert_equal(Token.new("three", 8, 13), t.next()) assert(! t.next()) t = AsciiLowerCaseFilter.new(AsciiLetterTokenizer.new(input)) assert_equal(Token.new("dbalmain", 0, 8), t.next()) assert_equal(Token.new("gmail", 9, 14), t.next()) assert_equal(Token.new("com", 15, 18), t.next()) assert_equal(Token.new("is", 19, 21), t.next()) assert_equal(Token.new("my", 22, 24), t.next()) assert_equal(Token.new("e", 25, 26), t.next()) assert_equal(Token.new("mail", 27, 31), t.next()) assert_equal(Token.new("address", 39, 46), t.next()) assert(! t.next()) end end class LetterTokenizerTest < Test::Unit::TestCase include Ferret::Analysis def test_letter_tokenizer() input = 'DBalmän@gmail.com is My e-mail 52 #$ address. 23#!$ ÁÄGÇ®ÊËÌ¯ÚØÃ¬ÖÎÍ' t = LetterTokenizer.new(input) assert_equal(Token.new('DBalmän', 0, 8), t.next) assert_equal(Token.new('gmail', 9, 14), t.next) assert_equal(Token.new('com', 15, 18), t.next) assert_equal(Token.new('is', 19, 21), t.next) assert_equal(Token.new('My', 22, 24), t.next) assert_equal(Token.new('e', 25, 26), t.next) assert_equal(Token.new('mail', 27, 31), t.next) assert_equal(Token.new('address', 40, 47), t.next) assert_equal(Token.new('ÁÄGÇ', 55, 62), t.next) assert_equal(Token.new('ÊËÌ', 64, 70), t.next) assert_equal(Token.new('ÚØÃ', 72, 78), t.next) assert_equal(Token.new('ÖÎÍ', 80, 86), t.next) assert(! t.next()) t.text = "one_two three" assert_equal(Token.new("one", 0, 3), t.next()) assert_equal(Token.new("two", 4, 7), t.next()) assert_equal(Token.new("three", 8, 13), t.next()) assert(! t.next()) t = LowerCaseFilter.new(LetterTokenizer.new(input)) assert_equal(Token.new('dbalmän', 0, 8), t.next) assert_equal(Token.new('gmail', 9, 14), t.next) assert_equal(Token.new('com', 15, 18), t.next) assert_equal(Token.new('is', 19, 21), t.next) assert_equal(Token.new('my', 22, 24), t.next) assert_equal(Token.new('e', 25, 26), t.next) assert_equal(Token.new('mail', 27, 31), t.next) assert_equal(Token.new('address', 40, 47), t.next) assert_equal(Token.new('áägç', 55, 62), t.next) assert_equal(Token.new('êëì', 64, 70), t.next) assert_equal(Token.new('úøã', 72, 78), t.next) assert_equal(Token.new('öîí', 80, 86), t.next) assert(! t.next()) t = LetterTokenizer.new(input, true) assert_equal(Token.new('dbalmän', 0, 8), t.next) assert_equal(Token.new('gmail', 9, 14), t.next) assert_equal(Token.new('com', 15, 18), t.next) assert_equal(Token.new('is', 19, 21), t.next) assert_equal(Token.new('my', 22, 24), t.next) assert_equal(Token.new('e', 25, 26), t.next) assert_equal(Token.new('mail', 27, 31), t.next) assert_equal(Token.new('address', 40, 47), t.next) assert_equal(Token.new('áägç', 55, 62), t.next) assert_equal(Token.new('êëì', 64, 70), t.next) assert_equal(Token.new('úøã', 72, 78), t.next) assert_equal(Token.new('öîí', 80, 86), t.next) assert(! t.next()) end end if (/utf-8/i =~ Ferret.locale) class AsciiWhiteSpaceTokenizerTest < Test::Unit::TestCase include Ferret::Analysis def test_whitespace_tokenizer() input = 'DBalmain@gmail.com is My e-mail 52 #$ ADDRESS. 23#!$' t = AsciiWhiteSpaceTokenizer.new(input) assert_equal(Token.new('DBalmain@gmail.com', 0, 18), t.next) assert_equal(Token.new('is', 19, 21), t.next) assert_equal(Token.new('My', 22, 24), t.next) assert_equal(Token.new('e-mail', 25, 31), t.next) assert_equal(Token.new('52', 32, 34), t.next) assert_equal(Token.new('#$', 37, 39), t.next) assert_equal(Token.new('ADDRESS.', 40, 48), t.next) assert_equal(Token.new('23#!$', 49, 54), t.next) assert(! t.next()) t.text = "one_two three" assert_equal(Token.new("one_two", 0, 7), t.next()) assert_equal(Token.new("three", 8, 13), t.next()) assert(! t.next()) t = AsciiLowerCaseFilter.new(AsciiWhiteSpaceTokenizer.new(input)) assert_equal(Token.new('dbalmain@gmail.com', 0, 18), t.next) assert_equal(Token.new('is', 19, 21), t.next) assert_equal(Token.new('my', 22, 24), t.next) assert_equal(Token.new('e-mail', 25, 31), t.next) assert_equal(Token.new('52', 32, 34), t.next) assert_equal(Token.new('#$', 37, 39), t.next) assert_equal(Token.new('address.', 40, 48), t.next) assert_equal(Token.new('23#!$', 49, 54), t.next) assert(! t.next()) end end class WhiteSpaceTokenizerTest < Test::Unit::TestCase include Ferret::Analysis def test_whitespace_tokenizer() input = 'DBalmän@gmail.com is My e-mail 52 #$ address. 23#!$ ÁÄGÇ®ÊËÌ¯ÚØÃ¬ÖÎÍ' t = WhiteSpaceTokenizer.new(input) assert_equal(Token.new('DBalmän@gmail.com', 0, 18), t.next) assert_equal(Token.new('is', 19, 21), t.next) assert_equal(Token.new('My', 22, 24), t.next) assert_equal(Token.new('e-mail', 25, 31), t.next) assert_equal(Token.new('52', 32, 34), t.next) assert_equal(Token.new('#$', 37, 39), t.next) assert_equal(Token.new('address.', 40, 48), t.next) assert_equal(Token.new('23#!$', 49, 54), t.next) assert_equal(Token.new('ÁÄGÇ®ÊËÌ¯ÚØÃ¬ÖÎÍ', 55, 86), t.next) assert(! t.next()) t.text = "one_two three" assert_equal(Token.new("one_two", 0, 7), t.next()) assert_equal(Token.new("three", 8, 13), t.next()) assert(! t.next()) t = LowerCaseFilter.new(WhiteSpaceTokenizer.new(input)) assert_equal(Token.new('dbalmän@gmail.com', 0, 18), t.next) assert_equal(Token.new('is', 19, 21), t.next) assert_equal(Token.new('my', 22, 24), t.next) assert_equal(Token.new('e-mail', 25, 31), t.next) assert_equal(Token.new('52', 32, 34), t.next) assert_equal(Token.new('#$', 37, 39), t.next) assert_equal(Token.new('address.', 40, 48), t.next) assert_equal(Token.new('23#!$', 49, 54), t.next) assert_equal(Token.new('áägç®êëì¯úøã¬öîí', 55, 86), t.next) assert(! t.next()) t = WhiteSpaceTokenizer.new(input, true) assert_equal(Token.new('dbalmän@gmail.com', 0, 18), t.next) assert_equal(Token.new('is', 19, 21), t.next) assert_equal(Token.new('my', 22, 24), t.next) assert_equal(Token.new('e-mail', 25, 31), t.next) assert_equal(Token.new('52', 32, 34), t.next) assert_equal(Token.new('#$', 37, 39), t.next) assert_equal(Token.new('address.', 40, 48), t.next) assert_equal(Token.new('23#!$', 49, 54), t.next) assert_equal(Token.new('áägç®êëì¯úøã¬öîí', 55, 86), t.next) assert(! t.next()) end end if (/utf-8/i =~ Ferret.locale) class AsciiStandardTokenizerTest < Test::Unit::TestCase include Ferret::Analysis def test_standard_tokenizer() input = 'DBalmain@gmail.com is My e-mail 52 #$ Address. 23#!$ http://www.google.com/results/ T.N.T. 123-1235-ASD-1234' t = AsciiStandardTokenizer.new(input) assert_equal(Token.new('DBalmain@gmail.com', 0, 18), t.next) assert_equal(Token.new('is', 19, 21), t.next) assert_equal(Token.new('My', 22, 24), t.next) assert_equal(Token.new('e-mail', 25, 31), t.next) assert_equal(Token.new('52', 32, 34), t.next) assert_equal(Token.new('Address', 40, 47), t.next) assert_equal(Token.new('23', 49, 51), t.next) assert_equal(Token.new('www.google.com/results', 55, 85), t.next) assert_equal(Token.new('TNT', 86, 91), t.next) assert_equal(Token.new('123-1235-ASD-1234', 93, 110), t.next) assert(! t.next()) t.text = "one_two three" assert_equal(Token.new("one_two", 0, 7), t.next()) assert_equal(Token.new("three", 8, 13), t.next()) assert(! t.next()) t = AsciiLowerCaseFilter.new(AsciiStandardTokenizer.new(input)) assert_equal(Token.new('dbalmain@gmail.com', 0, 18), t.next) assert_equal(Token.new('is', 19, 21), t.next) assert_equal(Token.new('my', 22, 24), t.next) assert_equal(Token.new('e-mail', 25, 31), t.next) assert_equal(Token.new('52', 32, 34), t.next) assert_equal(Token.new('address', 40, 47), t.next) assert_equal(Token.new('23', 49, 51), t.next) assert_equal(Token.new('www.google.com/results', 55, 85), t.next) assert_equal(Token.new('tnt', 86, 91), t.next) assert_equal(Token.new('123-1235-asd-1234', 93, 110), t.next) assert(! t.next()) end end class StandardTokenizerTest < Test::Unit::TestCase include Ferret::Analysis def test_standard_tokenizer() input = 'DBalmán@gmail.com is My e-mail 52 #$ Address. 23#!$ http://www.google.com/res_345/ T.N.T. 123-1235-ASD-1234 23#!$ ÁÄGÇ®ÊËÌ¯ÚØÃ¬ÖÎÍ' t = StandardTokenizer.new(input) assert_equal(Token.new('DBalmán@gmail.com', 0, 18), t.next) assert_equal(Token.new('is', 19, 21), t.next) assert_equal(Token.new('My', 22, 24), t.next) assert_equal(Token.new('e-mail', 25, 31), t.next) assert_equal(Token.new('52', 32, 34), t.next) assert_equal(Token.new('Address', 40, 47), t.next) assert_equal(Token.new('23', 49, 51), t.next) assert_equal(Token.new('www.google.com/res_345', 55, 85), t.next) assert_equal(Token.new('TNT', 86, 91), t.next) assert_equal(Token.new('123-1235-ASD-1234', 93, 110), t.next) assert_equal(Token.new('23', 111, 113), t.next) assert_equal(Token.new('ÁÄGÇ', 117, 124), t.next) assert_equal(Token.new('ÊËÌ', 126, 132), t.next) assert_equal(Token.new('ÚØÃ', 134, 140), t.next) assert_equal(Token.new('ÖÎÍ', 142, 148), t.next) assert(! t.next()) t.text = "one_two three" assert_equal(Token.new("one_two", 0, 7), t.next()) assert_equal(Token.new("three", 8, 13), t.next()) assert(! t.next()) t = LowerCaseFilter.new(StandardTokenizer.new(input)) assert_equal(Token.new('dbalmán@gmail.com', 0, 18), t.next) assert_equal(Token.new('is', 19, 21), t.next) assert_equal(Token.new('my', 22, 24), t.next) assert_equal(Token.new('e-mail', 25, 31), t.next) assert_equal(Token.new('52', 32, 34), t.next) assert_equal(Token.new('address', 40, 47), t.next) assert_equal(Token.new('23', 49, 51), t.next) assert_equal(Token.new('www.google.com/res_345', 55, 85), t.next) assert_equal(Token.new('tnt', 86, 91), t.next) assert_equal(Token.new('123-1235-asd-1234', 93, 110), t.next) assert_equal(Token.new('23', 111, 113), t.next) assert_equal(Token.new('áägç', 117, 124), t.next) assert_equal(Token.new('êëì', 126, 132), t.next) assert_equal(Token.new('úøã', 134, 140), t.next) assert_equal(Token.new('öîí', 142, 148), t.next) input = "e-mail 123-1235-asd-1234 http://www.davebalmain.com/trac-site/" t = HyphenFilter.new(StandardTokenizer.new(input)) assert_equal(Token.new('email', 0, 6), t.next) assert_equal(Token.new('e', 0, 1, 0), t.next) assert_equal(Token.new('mail', 2, 6, 1), t.next) assert_equal(Token.new('123-1235-asd-1234', 7, 24), t.next) assert_equal(Token.new('www.davebalmain.com/trac-site', 25, 61), t.next) assert(! t.next()) end end if (/utf-8/i =~ Ferret.locale) class RegExpTokenizerTest < Test::Unit::TestCase include Ferret::Analysis ALPHA = /[[:alpha:]_-]+/ APOSTROPHE = /#{ALPHA}('#{ALPHA})+/ ACRONYM = /#{ALPHA}\.(#{ALPHA}\.)+/ ACRONYM_WORD = /^#{ACRONYM}$/ APOSTROPHE_WORD = /^#{APOSTROPHE}$/ def test_reg_exp_tokenizer() input = 'DBalmain@gmail.com is My e-mail 52 #$ Address. 23#!$ http://www.google.com/RESULT_3.html T.N.T. 123-1235-ASD-1234 23 Rob\'s' t = RegExpTokenizer.new(input) assert_equal(Token.new('DBalmain@gmail.com', 0, 18), t.next) assert_equal(Token.new('is', 19, 21), t.next) assert_equal(Token.new('My', 22, 24), t.next) assert_equal(Token.new('e-mail', 25, 31), t.next) assert_equal(Token.new('52', 32, 34), t.next) assert_equal(Token.new('Address', 40, 47), t.next) assert_equal(Token.new('23', 49, 51), t.next) assert_equal(Token.new('http://www.google.com/RESULT_3.html', 55, 90), t.next) assert_equal(Token.new('T.N.T.', 91, 97), t.next) assert_equal(Token.new('123-1235-ASD-1234', 98, 115), t.next) assert_equal(Token.new('23', 116, 118), t.next) assert_equal(Token.new('Rob\'s', 119, 124), t.next) assert(! t.next()) t.text = "one_two three" assert_equal(Token.new("one_two", 0, 7), t.next()) assert_equal(Token.new("three", 8, 13), t.next()) assert(! t.next()) t = LowerCaseFilter.new(RegExpTokenizer.new(input)) t2 = LowerCaseFilter.new(RegExpTokenizer.new(input, /\w{2,}/)) assert_equal(Token.new('dbalmain@gmail.com', 0, 18), t.next) assert_equal(Token.new('is', 19, 21), t.next) assert_equal(Token.new('my', 22, 24), t.next) assert_equal(Token.new('e-mail', 25, 31), t.next) assert_equal(Token.new('52', 32, 34), t.next) assert_equal(Token.new('address', 40, 47), t.next) assert_equal(Token.new('23', 49, 51), t.next) assert_equal(Token.new('http://www.google.com/result_3.html', 55, 90), t.next) assert_equal(Token.new('t.n.t.', 91, 97), t.next) assert_equal(Token.new('123-1235-asd-1234', 98, 115), t.next) assert_equal(Token.new('23', 116, 118), t.next) assert_equal(Token.new('rob\'s', 119, 124), t.next) assert(! t.next()) assert_equal(Token.new('dbalmain', 0, 8), t2.next) assert_equal(Token.new('gmail', 9, 14), t2.next) assert_equal(Token.new('com', 15, 18), t2.next) assert_equal(Token.new('is', 19, 21), t2.next) assert_equal(Token.new('my', 22, 24), t2.next) assert_equal(Token.new('mail', 27, 31), t2.next) assert_equal(Token.new('52', 32, 34), t2.next) assert_equal(Token.new('address', 40, 47), t2.next) assert_equal(Token.new('23', 49, 51), t2.next) assert_equal(Token.new('http', 55, 59), t2.next) assert_equal(Token.new('www', 62, 65), t2.next) assert_equal(Token.new('google', 66, 72), t2.next) assert_equal(Token.new('com', 73, 76), t2.next) assert_equal(Token.new('result_3', 77, 85), t2.next) assert_equal(Token.new('html', 86, 90), t2.next) assert_equal(Token.new('123', 98, 101), t2.next) assert_equal(Token.new('1235', 102, 106), t2.next) assert_equal(Token.new('asd', 107, 110), t2.next) assert_equal(Token.new('1234', 111, 115), t2.next) assert_equal(Token.new('23', 116, 118), t2.next) assert_equal(Token.new('rob', 119, 122), t2.next) assert(! t2.next()) t = RegExpTokenizer.new(input) do |str| if str =~ ACRONYM_WORD str.gsub!(/\./, '') elsif str =~ APOSTROPHE_WORD str.gsub!(/'[sS]$/, '') end str end t = LowerCaseFilter.new(t) assert_equal(Token.new('dbalmain@gmail.com', 0, 18), t.next) assert_equal(Token.new('is', 19, 21), t.next) assert_equal(Token.new('my', 22, 24), t.next) assert_equal(Token.new('e-mail', 25, 31), t.next) assert_equal(Token.new('52', 32, 34), t.next) assert_equal(Token.new('address', 40, 47), t.next) assert_equal(Token.new('23', 49, 51), t.next) assert_equal(Token.new('http://www.google.com/result_3.html', 55, 90), t.next) assert_equal(Token.new('tnt', 91, 97), t.next) assert_equal(Token.new('123-1235-asd-1234', 98, 115), t.next) assert_equal(Token.new('23', 116, 118), t.next) assert_equal(Token.new('rob', 119, 124), t.next) assert(! t.next()) end end class MappingFilterTest < Test::Unit::TestCase include Ferret::Analysis def test_mapping_filter() mapping = { ['à','á','â','ã','ä','å','ā','ă'] => 'a', 'æ' => 'ae', ['ď','đ'] => 'd', ['ç','ć','č','ĉ','ċ'] => 'c', ['è','é','ê','ë','ē','ę','ě','ĕ','ė',] => 'e', ['ƒ'] => 'f', ['ĝ','ğ','ġ','ģ'] => 'g', ['ĥ','ħ'] => 'h', ['ì','ì','í','î','ï','ī','ĩ','ĭ'] => 'i', ['į','ı','ij','ĵ'] => 'j', ['ķ','ĸ'] => 'k', ['ł','ľ','ĺ','ļ','ŀ'] => 'l', ['ñ','ń','ň','ņ','ʼn','ŋ'] => 'n', ['ò','ó','ô','õ','ö','ø','ō','ő','ŏ','ŏ'] => 'o', 'œ' => 'oek', 'ą' => 'q', ['ŕ','ř','ŗ'] => 'r', ['ś','š','ş','ŝ','ș'] => 's', ['ť','ţ','ŧ','ț'] => 't', ['ù','ú','û','ü','ū','ů','ű','ŭ','ũ','ų'] => 'u', 'ŵ' => 'w', ['ý','ÿ','ŷ'] => 'y', ['ž','ż','ź'] => 'z' } input = <") end prev = pq.pop() (PQ_STRESS_SIZE - 1).times do curr = pq.pop() assert(prev <= curr, "#{prev} should be less than #{curr}") prev = curr end pq.clear() end def test_pq_block pq = PriorityQueue.new(21) {|a, b| a > b} 100.times do pq.insert("<#{rand(50)}>") end prev = pq.pop() 20.times do curr = pq.pop() assert(prev >= curr, "#{prev} should be greater than #{curr}") prev = curr end assert_equal 0, pq.size end def test_pq_proc pq = PriorityQueue.new({:less_than => lambda {|a, b| a.size > b.size}, :capacity => 21}) 100.times do pq.insert("x" * rand(50)) end prev = pq.pop() 20.times do curr = pq.pop() assert(prev.size >= curr.size, "#{prev} should be greater than #{curr}") prev = curr end assert_equal 0, pq.size end end ferret-0.11.8.6/test/unit/utils/tc_bit_vector.rb0000644000004100000410000001367212476264460021561 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" class BitVectorTest < Test::Unit::TestCase include Ferret::Utils def test_bv_get_set() bv = BitVector.new assert_equal 0, bv.count bv.set 10 assert bv.get(10) assert bv[10] assert_equal 1, bv.count bv[10] = false assert ! bv[10] bv[10] = true assert bv[10] bv[10] = nil assert ! bv[10] bv[10] = true assert bv[10] bv.unset 10 assert ! bv[10] bv[10] = true assert bv[10] end def test_bv_count() bv = BitVector.new bv.set 10 assert_equal 1, bv.count bv.set 20 assert bv.get(20) assert_equal 2, bv.count bv.set 21 assert bv.get(21) assert_equal 3, bv.count bv.unset 21 assert ! bv.get(21) assert_equal 2, bv.count bv[20] = nil assert ! bv.get(20) assert_equal 1, bv.count (50..100).each {|i| bv.set i } (50..100).each {|i| assert bv[i] } assert bv.get(10) assert_equal 52, bv.count bv.clear assert_equal 0, bv.count (50..100).each {|i| assert ! bv[i] } assert ! bv.get(10) end def test_bv_eql_hash bv1 = BitVector.new bv2 = BitVector.new assert_equal(bv1, bv2) assert_equal(bv1.hash, bv2.hash) bv1.set(10) assert_not_equal(bv1, bv2) assert_not_equal(bv1.hash, bv2.hash) bv2.set(10) assert_equal(bv1, bv2) assert_equal(bv1.hash, bv2.hash) 10.times {|i| bv1.set(i * 31)} assert_not_equal(bv1, bv2) assert_not_equal(bv1.hash, bv2.hash) 10.times {|i| bv2.set(i * 31)} assert_equal(bv1, bv2) assert_equal(bv1.hash, bv2.hash) bv1.clear assert_not_equal(bv1, bv2) assert_not_equal(bv1.hash, bv2.hash) bv2.clear assert_equal(bv1, bv2) assert_equal(bv1.hash, bv2.hash) end BV_COUNT = 500 BV_SIZE = 1000 def test_bv_and bv1 = BitVector.new bv2 = BitVector.new set1 = set2 = count = 0 BV_COUNT.times do |i| bit = rand(BV_SIZE) bv1.set(bit) set1 |= (1 << bit) end BV_COUNT.times do |i| bit = rand(BV_SIZE) bv2.set(bit) bitmask = (1 << bit) if ((set1 & bitmask) > 0) && ((set2 & bitmask) == 0) set2 |= (1 << bit) count += 1 end end and_bv = bv1 & bv2 assert_equal(count, and_bv.count) BV_SIZE.times do |i| assert_equal(((set2 & (1 << i)) > 0), and_bv[i]) end bv2.and! bv1 assert_equal(bv2, and_bv) bv2 = BitVector.new and_bv = bv1 & bv2 assert_equal(bv2, and_bv, "and_bv should be empty") assert_equal(0, and_bv.count) bv1 = BitVector.new bv2 = BitVector.new.not! bv1.set(10) bv1.set(11) bv1.set(20) assert_equal(bv1, bv1 & bv2, "bv anded with empty not bv should be same") end def test_bv_or bv1 = BitVector.new bv2 = BitVector.new set = count = 0 BV_COUNT.times do |i| bit = rand(BV_SIZE) bv1.set(bit) bitmask = (1 << bit) if (set & bitmask) == 0 count += 1 set |= bitmask end end BV_COUNT.times do |i| bit = rand(BV_SIZE) bv2.set(bit) bitmask = (1 << bit) if (set & bitmask) == 0 count += 1 set |= bitmask end end or_bv = bv1 | bv2 assert_equal(count, or_bv.count) BV_SIZE.times do |i| assert_equal(((set & (1 << i)) > 0), or_bv[i]) end bv2.or! bv1 assert_equal(bv2, or_bv) bv2 = BitVector.new or_bv = bv1 | bv2 assert_equal(bv1, or_bv) end def test_bv_xor bv1 = BitVector.new bv2 = BitVector.new set1 = set2 = count = 0 BV_COUNT.times do |i| bit = rand(BV_SIZE) bv1.set(bit) set1 |= (1 << bit) end BV_COUNT.times do |i| bit = rand(BV_SIZE) bv2.set(bit) set2 |= (1 << bit) end bitmask = 1 set1 ^= set2 BV_SIZE.times do |i| count += 1 if (set1 & bitmask) > 0 bitmask <<= 1 end xor_bv = bv1 ^ bv2 BV_SIZE.times do |i| assert_equal(((set1 & (1 << i)) > 0), xor_bv[i]) end assert_equal(count, xor_bv.count) bv2.xor! bv1 assert_equal(bv2, xor_bv) bv2 = BitVector.new xor_bv = bv1 ^ bv2 assert_equal(bv1, xor_bv) end def test_bv_not bv = BitVector.new [1, 5, 25, 41, 97, 185].each {|i| bv.set(i)} not_bv = ~bv assert_equal(bv.count, not_bv.count) 200.times {|i| assert(bv[i] != not_bv[i])} not_bv.not! assert_equal(bv, not_bv) end SCAN_SIZE = 200 SCAN_INC = 97 def test_scan bv = BitVector.new SCAN_SIZE.times {|i| bv.set(i * SCAN_INC)} not_bv = ~bv SCAN_SIZE.times do |i| assert_equal(i * SCAN_INC, bv.next_from((i - 1) * SCAN_INC + 1)) assert_equal(i * SCAN_INC, not_bv.next_unset_from((i - 1) * SCAN_INC + 1)) end assert_equal(-1, bv.next_from((SCAN_SIZE - 1) * SCAN_INC + 1)) assert_equal(-1, not_bv.next_unset_from((SCAN_SIZE - 1) * SCAN_INC + 1)) bit = 0 bv.each {|i| assert_equal(bit, i); bit += SCAN_INC } assert_equal(bit, SCAN_SIZE * SCAN_INC) bit = 0 not_bv.each {|i| assert_equal(bit, i); bit += SCAN_INC } assert_equal(bit, SCAN_SIZE * SCAN_INC) bv.reset_scan not_bv.reset_scan SCAN_SIZE.times do |i| assert_equal(i * SCAN_INC, bv.next) assert_equal(i * SCAN_INC, not_bv.next_unset) end assert_equal(-1, bv.next) assert_equal(-1, not_bv.next_unset) bv.clear SCAN_SIZE.times {|i| bv.set(i)} not_bv = ~bv SCAN_SIZE.times do |i| assert_equal(i, bv.next) assert_equal(i, not_bv.next_unset) end assert_equal(-1, bv.next) assert_equal(-1, not_bv.next_unset) bit = 0 bv.each {|i| assert_equal(bit, i); bit += 1 } assert_equal(bit, SCAN_SIZE) bit = 0 not_bv.each {|i| assert_equal(bit, i); bit += 1 } assert_equal(bit, SCAN_SIZE) end def test_to_a bv = BitVector.new ary = (1..100).collect { rand(1000) }.sort.uniq ary.each {|i| bv.set(i)} assert_equal(ary, bv.to_a) end end ferret-0.11.8.6/test/unit/utils/tc_number_tools.rb0000755000004100000410000001102712476264460022124 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" require 'ferret/number_tools' class NumberToolsTest < Test::Unit::TestCase include Ferret::Utils def test_to_i_lex_near_zero() (-10..10).each do |num| assert(num.to_s_lex > (num-1).to_s_lex, "Strings should sort correctly but " + "#{num.to_s_lex} <= #{(num-1).to_s_lex}") assert_equal(num, num.to_s_lex.to_i_lex) end end def test_to_i_pad_near_zero() (1..10).each do |num| assert(num.to_s_pad(3) > (num-1).to_s_pad(3), "Strings should sort correctly but " + "#{num.to_s_pad(3)} <= #{(num-1).to_s_pad(3)}") assert_equal(num, num.to_s_pad(3).to_i) end end def test_to_i_lex_larger_numbers 100.times do num1 = rand(10000000000000000000000000000000000) num2 = rand(10000000000000000000000000000000000) num1 *= -1 if rand(2) == 0 num2 *= -1 if rand(2) == 0 assert_equal(num1, num1.to_s_lex.to_i_lex) assert_equal(num2, num2.to_s_lex.to_i_lex) assert_equal(num1 < num2, num1.to_s_lex < num2.to_s_lex, "Strings should sort correctly but " + "#{num1} < #{num2} == #{num1 < num2} but " + "#{num1.to_s_lex} < #{num2.to_s_lex} == " + "#{num1.to_s_lex < num2.to_s_lex}") end end def test_to_i_pad 100.times do num1 = rand(10000000000000000000000000000000000) num2 = rand(10000000000000000000000000000000000) assert_equal(num1, num1.to_s_pad(35).to_i) assert_equal(num2, num2.to_s_pad(35).to_i) assert_equal(num1 < num2, num1.to_s_pad(35) < num2.to_s_pad(35), "Strings should sort correctly but " + "#{num1} < #{num2} == #{num1 < num2} but " + "#{num1.to_s_pad(35)} < #{num2.to_s_pad(35)} == " + "#{num1.to_s_pad(35) < num2.to_s_pad(35)}") end end def test_time_to_s_lex t_num = Time.now.to_i - 365*24*60*60 # prevent range error 10.times do t1 = Time.now - rand(t_num) t2 = Time.now - rand(t_num) assert_equal(t1.to_s, t1.to_s_lex(:second).to_time_lex.to_s) assert_equal(t2.to_s, t2.to_s_lex(:second).to_time_lex.to_s) [:year, :month, :day, :hour, :minute, :second, :millisecond].each do |prec| t1_x = t1.to_s_lex(prec).to_time_lex t2_x = t2.to_s_lex(prec).to_time_lex assert_equal(t1_x < t2_x, t1.to_s_lex(prec) < t2.to_s_lex(prec), "Strings should sort correctly but " + "#{t1_x} < #{t2_x} == #{t1_x < t2_x} but " + "#{t1.to_s_lex(prec)} < #{t2.to_s_lex(prec)} == " + "#{t1.to_s_lex(prec) < t2.to_s_lex(prec)}") end end end def test_date_to_s_lex 10.times do d1 = Date.civil(rand(2200), rand(12) + 1, rand(28) + 1) d2 = Date.civil(rand(2200), rand(12) + 1, rand(28) + 1) assert_equal(d1.to_s, d1.to_s_lex(:day).to_date_lex.to_s) assert_equal(d2.to_s, d2.to_s_lex(:day).to_date_lex.to_s) [:year, :month, :day].each do |prec| d1_x = d1.to_s_lex(prec).to_date_lex d2_x = d2.to_s_lex(prec).to_date_lex assert_equal(d1_x < d2_x, d1.to_s_lex(prec) < d2.to_s_lex(prec), "Strings should sort correctly but " + "#{d1_x} < #{d2_x} == #{d1_x < d2_x} but " + "#{d1.to_s_lex(prec)} < #{d2.to_s_lex(prec)} == " + "#{d1.to_s_lex(prec) < d2.to_s_lex(prec)}") end end end def test_date_time_to_s_lex 10.times do d1 = "#{rand(600) + 1600}-#{rand(12)+1}-#{rand(28)+1} " + "#{rand(24)}:#{rand(60)}:#{rand(60)}" d2 = "#{rand(600) + 1600}-#{rand(12)+1}-#{rand(28)+1} " + "#{rand(24)}:#{rand(60)}:#{rand(60)}" d1 = DateTime.strptime(d1, "%Y-%m-%d %H:%M:%S") d2 = DateTime.strptime(d2, "%Y-%m-%d %H:%M:%S") assert_equal(d1.to_s, d1.to_s_lex(:second).to_date_time_lex.to_s) assert_equal(d2.to_s, d2.to_s_lex(:second).to_date_time_lex.to_s) [:year, :month, :day, :hour, :minute, :second].each do |prec| d1_x = d1.to_s_lex(prec).to_date_lex d2_x = d2.to_s_lex(prec).to_date_lex assert_equal(d1_x < d2_x, d1.to_s_lex(prec) < d2.to_s_lex(prec), "Strings should sort correctly but " + "#{d1_x} < #{d2_x} == #{d1_x < d2_x} but " + "#{d1.to_s_lex(prec)} < #{d2.to_s_lex(prec)} == " + "#{d1.to_s_lex(prec) < d2.to_s_lex(prec)}") end end end end ferret-0.11.8.6/test/unit/ts_query_parser.rb0000644000004100000410000000014212476264460021006 0ustar www-datawww-datarequire File.join(File.dirname(__FILE__), "../test_helper.rb") load_test_dir('unit/query_parser') ferret-0.11.8.6/test/unit/ts_index.rb0000755000004100000410000000013312476264460017377 0ustar www-datawww-datarequire File.join(File.dirname(__FILE__), "../test_helper.rb") load_test_dir('unit/index') ferret-0.11.8.6/test/unit/tc_document.rb0000644000004100000410000000370612476264460020074 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../test_helper" class DocumentTest < Test::Unit::TestCase def test_field f = Ferret::Field.new assert_equal(0, f.size) assert_equal(1.0, f.boost) f2 = Ferret::Field.new assert_equal(f, f2) f << "section0" assert_equal(1, f.size) assert_equal(1.0, f.boost) assert_equal("section0", f[0]) assert_not_equal(f, f2) f << "section1" assert_equal(2, f.size) assert_equal(1.0, f.boost) assert_equal("section0", f[0]) assert_equal("section1", f[1]) assert_equal('["section0", "section1"]', f.to_s) assert_not_equal(f, f2) f2 += f assert_equal(f, f2) f.boost = 4.0 assert_not_equal(f, f2) assert_equal('["section0", "section1"]^4.0', f.to_s) f2.boost = 4.0 assert_equal(f, f2) f3 = Ferret::Field.new(["section0", "section1"], 4.0) assert_equal(f, f3) end def test_document d = Ferret::Document.new d[:name] = Ferret::Field.new d[:name] << "section0" d[:name] << "section1" assert_equal(1, d.size) assert_equal(1.0, d.boost) assert_equal(%( Document { :name => ["section0", "section1"] }).strip, d.to_s) d.boost = 123.0 d[:name] << "section2" d[:name].boost = 321.0 assert_equal(123.0, d.boost) assert_equal(321.0, d[:name].boost) assert_equal(%( Document { :name => ["section0", "section1", "section2"]^321.0 }^123.0).strip, d.to_s) d[:title] = "Shawshank Redemption" d[:actors] = ["Tim Robbins", "Morgan Freeman"] assert_equal(3, d.size) assert_equal(%( Document { :actors => ["Tim Robbins", "Morgan Freeman"] :name => ["section0", "section1", "section2"]^321.0 :title => "Shawshank Redemption" }^123.0).strip, d.to_s) d2 = Ferret::Document.new(123.0) d2[:name] = Ferret::Field.new(["section0", "section1", "section2"], 321.0) d2[:title] = "Shawshank Redemption" d2[:actors] = ["Tim Robbins", "Morgan Freeman"] assert_equal(d, d2) end end ferret-0.11.8.6/test/unit/query_parser/0000755000004100000410000000000012476264460017756 5ustar www-datawww-dataferret-0.11.8.6/test/unit/query_parser/tc_query_parser.rb0000644000004100000410000002252612476264460023521 0ustar www-datawww-datarequire File.dirname(__FILE__) + "/../../test_helper" class QueryParserTest < Test::Unit::TestCase include Ferret::Analysis def test_strings() parser = Ferret::QueryParser.new(:default_field => "xxx", :fields => ["xxx", "field", "f1", "f2"], :tokenized_fields => ["xxx", "f1", "f2"]) pairs = [ ['', ''], ['*:word', 'word field:word f1:word f2:word'], ['word', 'word'], ['field:word', 'field:word'], ['"word1 word2 word#"', '"word1 word2 word"'], ['"word1 %%% word3"', '"word1 <> word3"~1'], ['field:"one two three"', 'field:"one two three"'], ['field:"one %%% three"', 'field:"one %%% three"'], ['f1:"one %%% three"', 'f1:"one <> three"~1'], ['field:"one <> three"', 'field:"one <> three"'], ['field:"one <> three <>"', 'field:"one <> three"'], ['field:"one <> <> <> three <>"', 'field:"one <> <> <> three"'], ['field:"one <> 222 <> three|four|five <>"', 'field:"one <> 222 <> three|four|five"'], ['field:"on1|tw2 THREE|four|five six|seven"', 'field:"on1|tw2 THREE|four|five six|seven"'], ['field:"testing|trucks"', 'field:"testing|trucks"'], ['[aaa bbb]', '[aaa bbb]'], ['{aaa bbb]', '{aaa bbb]'], ['field:[aaa bbb}', 'field:[aaa bbb}'], ['{aaa bbb}', '{aaa bbb}'], ['{aaa>', '{aaa>'], ['[aaa>', '[aaa>'], ['field:aaa', '{aaa>'], ['>=aaa', '[aaa>'], ['', '[a>'], ['field:<=aaa', 'field: asdf|asdf"~4', 'field:"asdf <> asdf|asdf"~4'], ['"one two three four five"~5', '"one two three four five"~5'], ['ab?de', 'ab?de'], ['ab*de', 'ab*de'], ['asdf?*?asd*dsf?asfd*asdf?', 'asdf?*?asd*dsf?asfd*asdf?'], ['field:a* AND field:(b*)', '+field:a* +field:b*'], ['field:abc~ AND field:(b*)', '+field:abc~ +field:b*'], ['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'], ['*:xxx', 'xxx field:xxx f1:xxx f2:xxx'], ['f1|f2:xxx', 'f1:xxx f2:xxx'], ['*:asd~0.2', 'asd~0.2 field:asd~0.2 f1:asd~0.2 f2:asd~0.2'], ['f1|f2:asd~0.2', 'f1:asd~0.2 f2:asd~0.2'], ['*:a?d*^20.0', '(a?d* field:a?d* f1:a?d* f2:a?d*)^20.0'], ['f1|f2:a?d*^20.0', '(f1:a?d* f2:a?d*)^20.0'], ['*:"asdf <> xxx|yyy"', '"asdf <> xxx|yyy" field:"asdf <> xxx|yyy" f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy"'], ['f1|f2:"asdf <> xxx|yyy"', 'f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy"'], ['f1|f2:"asdf <> do|yyy"', 'f1:"asdf <> yyy" f2:"asdf <> yyy"'], ['f1|f2:"do|cat"', 'f1:cat f2:cat'], ['*:[bbb xxx]', '[bbb xxx] field:[bbb xxx] f1:[bbb xxx] f2:[bbb xxx]'], ['f1|f2:[bbb xxx]', 'f1:[bbb xxx] f2:[bbb xxx]'], ['*:(xxx AND bbb)', '+(xxx field:xxx f1:xxx f2:xxx) +(bbb field:bbb f1:bbb f2:bbb)'], ['f1|f2:(xxx AND bbb)', '+(f1:xxx f2:xxx) +(f1:bbb f2:bbb)'], ['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'], ['"onewordphrase"', 'onewordphrase'], ["who'd", "who'd"] ] pairs.each do |query_str, expected| assert_equal(expected, parser.parse(query_str).to_s("xxx")) end end def test_qp_with_standard_analyzer() parser = Ferret::QueryParser.new(:default_field => "xxx", :fields => ["xxx", "key"], :analyzer => StandardAnalyzer.new) pairs = [ ['key:1234', 'key:1234'], ['key:(1234 and Dave)', 'key:1234 key:dave'], ['key:(1234)', 'key:1234'], ['and the but they with', ''] ] pairs.each do |query_str, expected| assert_equal(expected, parser.parse(query_str).to_s("xxx")) end end def test_qp_changing_fields() parser = Ferret::QueryParser.new(:default_field => "xxx", :fields => ["xxx", "key"], :analyzer => WhiteSpaceAnalyzer.new) assert_equal('word key:word', parser.parse("*:word").to_s("xxx")) parser.fields = ["xxx", "one", "two", "three"] assert_equal('word one:word two:word three:word', parser.parse("*:word").to_s("xxx")) assert_equal('three:word four:word', parser.parse("three:word four:word").to_s("xxx")) end def test_qp_allow_any_field() parser = Ferret::QueryParser.new(:default_field => "xxx", :fields => ["xxx", "key"], :analyzer => WhiteSpaceAnalyzer.new, :validate_fields => true) assert_equal('key:word', parser.parse("key:word song:word").to_s("xxx")) assert_equal('word key:word', parser.parse("*:word").to_s("xxx")) parser = Ferret::QueryParser.new(:default_field => "xxx", :fields => ["xxx", "key"], :analyzer => WhiteSpaceAnalyzer.new) assert_equal('key:word song:word', parser.parse("key:word song:word").to_s("xxx")) assert_equal('word key:word', parser.parse("*:word").to_s("xxx")) end def do_test_query_parse_exception_raised(str) parser = Ferret::QueryParser.new(:default_field => "xxx", :fields => ["f1", "f2", "f3"], :handle_parse_errors => false) assert_raise(Ferret::QueryParser::QueryParseException, str + " should have failed") do parser.parse(str) end end def test_or_default parser = Ferret::QueryParser.new(:default_field => :*, :fields => [:x, :y], :or_default => false, :analyzer => StandardAnalyzer.new) pairs = [ ['word', 'x:word y:word'], ['word1 word2', '+(x:word1 y:word1) +(x:word2 y:word2)'] ] pairs.each do |query_str, expected| assert_equal(expected, parser.parse(query_str).to_s("")) end end def test_prefix_query parser = Ferret::QueryParser.new(:default_field => "xxx", :fields => ["xxx"], :analyzer => StandardAnalyzer.new) assert_equal(Ferret::Search::PrefixQuery, parser.parse("asdg*").class) assert_equal(Ferret::Search::WildcardQuery, parser.parse("a?dg*").class) assert_equal(Ferret::Search::WildcardQuery, parser.parse("a*dg*").class) assert_equal(Ferret::Search::WildcardQuery, parser.parse("adg*c").class) end def test_bad_queries parser = Ferret::QueryParser.new(:default_field => "xxx", :fields => ["f1", "f2"]) pairs = [ ['::*word', 'word'], ['::*&)(*^&*(', ''], ['::*&one)(*two(*&"', '"one two"~1'], [':', ''], ['[, ]', ''], ['{, }', ''], ['!', ''], ['+', ''], ['~', ''], ['^', ''], ['-', ''], ['|', ''], ['<, >', ''], ['=', ''], ['